1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 76 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCResetSolvers" 82 PetscErrorCode PCBDDCResetSolvers(PC pc) 83 { 84 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 89 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 94 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 100 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 101 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 104 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 107 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 108 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 110 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 111 PetscFunctionReturn(0); 112 } 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 116 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 117 { 118 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 119 PC_IS *pcis = (PC_IS*)pc->data; 120 VecType impVecType; 121 PetscInt n_constraints,n_R,old_size; 122 PetscErrorCode ierr; 123 124 PetscFunctionBegin; 125 if (!pcbddc->ConstraintMatrix) { 126 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 127 } 128 /* get sizes */ 129 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 130 n_R = pcis->n-pcbddc->n_actual_vertices; 131 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 132 /* local work vectors (try to avoid unneeded work)*/ 133 /* R nodes */ 134 old_size = -1; 135 if (pcbddc->vec1_R) { 136 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 137 } 138 if (n_R != old_size) { 139 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 140 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 141 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 142 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 143 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 144 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 145 } 146 /* local primal dofs */ 147 old_size = -1; 148 if (pcbddc->vec1_P) { 149 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 150 } 151 if (pcbddc->local_primal_size != old_size) { 152 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 153 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 154 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 155 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 156 } 157 /* local explicit constraints */ 158 old_size = -1; 159 if (pcbddc->vec1_C) { 160 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 161 } 162 if (n_constraints && n_constraints != old_size) { 163 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 164 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 165 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 166 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNCT__ 172 #define __FUNCT__ "PCBDDCSetUpCorrection" 173 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 174 { 175 PetscErrorCode ierr; 176 /* pointers to pcis and pcbddc */ 177 PC_IS* pcis = (PC_IS*)pc->data; 178 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 179 /* submatrices of local problem */ 180 Mat A_RV,A_VR,A_VV; 181 /* working matrices */ 182 Mat M1,M2,M3,C_CR; 183 /* working vectors */ 184 Vec vec1_C,vec2_C,vec1_V,vec2_V; 185 /* additional working stuff */ 186 IS is_aux; 187 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 188 const PetscScalar *array,*row_cmat_values; 189 const PetscInt *row_cmat_indices,*idx_R_local; 190 PetscInt *idx_V_B,*auxindices; 191 PetscInt n_vertices,n_constraints,size_of_constraint; 192 PetscInt i,j,n_R,n_D,n_B; 193 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 194 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 195 MatType impMatType; 196 /* some shortcuts to scalars */ 197 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 198 /* for debugging purposes */ 199 PetscReal *coarsefunctions_errors,*constraints_errors; 200 201 PetscFunctionBegin; 202 /* get number of vertices (corners plus constraints with change of basis) 203 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 204 n_vertices = pcbddc->n_actual_vertices; 205 n_constraints = pcbddc->local_primal_size-n_vertices; 206 /* Set Non-overlapping dimensions */ 207 n_B = pcis->n_B; n_D = pcis->n - n_B; 208 n_R = pcis->n-n_vertices; 209 210 /* Set types for local objects needed by BDDC precondtioner */ 211 impMatType = MATSEQDENSE; 212 213 /* Allocating some extra storage just to be safe */ 214 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 215 for (i=0;i<pcis->n;i++) auxindices[i]=i; 216 217 /* vertices in boundary numbering */ 218 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 219 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 220 if (i != n_vertices) { 221 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 222 } 223 224 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 225 if (n_constraints) { 226 /* see if we can save some allocations */ 227 if (pcbddc->local_auxmat2) { 228 PetscInt on_R,on_constraints; 229 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 230 if (on_R != n_R || on_constraints != n_constraints) { 231 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 232 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 233 } 234 } 235 /* work vectors */ 236 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 237 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 238 /* auxiliary matrices */ 239 if (!pcbddc->local_auxmat2) { 240 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 241 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 242 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 243 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 244 } 245 246 /* Extract constraints on R nodes: C_{CR} */ 247 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 248 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 249 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 250 251 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 252 for (i=0;i<n_constraints;i++) { 253 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 254 /* Get row of constraint matrix in R numbering */ 255 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 256 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 257 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 258 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 259 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 260 /* Solve for row of constraint matrix in R numbering */ 261 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 262 /* Set values in local_auxmat2 */ 263 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 264 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 265 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 266 } 267 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 268 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 269 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 270 271 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 272 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 273 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 274 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 275 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 276 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 277 ierr = MatSetUp(M1);CHKERRQ(ierr); 278 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 279 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 280 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 281 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 282 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 283 ierr = MatDestroy(&M2);CHKERRQ(ierr); 284 ierr = MatDestroy(&M3);CHKERRQ(ierr); 285 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 286 if (!pcbddc->local_auxmat1) { 287 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 288 } else { 289 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 290 } 291 } 292 293 /* Get submatrices from subdomain matrix */ 294 if (n_vertices) { 295 PetscInt ibs,mbs; 296 PetscBool issbaij; 297 Mat newmat; 298 299 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 300 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 301 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 302 if (ibs != mbs) { /* need to convert to SEQAIJ */ 303 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 304 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 305 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 307 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 308 } else { 309 /* this is safe */ 310 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 311 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 312 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 313 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 314 /* which of the two approaches is faster? */ 315 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 316 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 317 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 318 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 319 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 320 } else { 321 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 322 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 323 } 324 } 325 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 326 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 327 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 328 } 329 330 /* Matrix of coarse basis functions (local) */ 331 if (pcbddc->coarse_phi_B) { 332 PetscInt on_B,on_primal; 333 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 334 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 335 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 336 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 337 } 338 } 339 if (pcbddc->coarse_phi_D) { 340 PetscInt on_D,on_primal; 341 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 342 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 343 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 344 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 345 } 346 } 347 if (!pcbddc->coarse_phi_B) { 348 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 349 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 350 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 351 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 352 } 353 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 354 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 355 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 356 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 357 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 358 } 359 360 if (pcbddc->dbg_flag) { 361 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 362 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 363 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 364 } 365 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 366 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 367 368 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 369 370 /* vertices */ 371 for (i=0;i<n_vertices;i++) { 372 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 373 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 374 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 375 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 376 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 377 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 378 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 379 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 380 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 381 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 382 if (n_constraints) { 383 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 384 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 385 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 386 } 387 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 388 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 389 390 /* Set values in coarse basis function and subdomain part of coarse_mat */ 391 /* coarse basis functions */ 392 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 393 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 394 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 395 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 396 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 397 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 398 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 399 if (pcbddc->switch_static || pcbddc->dbg_flag) { 400 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 401 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 402 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 403 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 404 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 405 } 406 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 407 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 408 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 409 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 410 if (n_constraints) { 411 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 412 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 413 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 414 } 415 416 /* check */ 417 if (pcbddc->dbg_flag) { 418 /* assemble subdomain vector on local nodes */ 419 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 420 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 421 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 422 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 423 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 425 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 426 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 427 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 428 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 429 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 430 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 431 if (n_constraints) { 432 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 433 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 434 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 435 } 436 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 437 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 438 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 439 /* check saddle point solution */ 440 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 441 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 442 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 443 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 444 /* shift by the identity matrix */ 445 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 446 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 447 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 449 } 450 } 451 452 /* constraints */ 453 for (i=0;i<n_constraints;i++) { 454 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 455 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 456 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 457 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 458 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 459 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 460 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 461 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 462 if (n_vertices) { 463 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 464 } 465 /* Set values in coarse basis function and subdomain part of coarse_mat */ 466 /* coarse basis functions */ 467 j = i+n_vertices; /* don't touch this! */ 468 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 469 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 470 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 472 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 473 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 474 if (pcbddc->switch_static || pcbddc->dbg_flag) { 475 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 476 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 478 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 479 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 480 } 481 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 482 if (n_vertices) { 483 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 484 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 485 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 486 } 487 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 488 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 489 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 490 491 if (pcbddc->dbg_flag) { 492 /* assemble subdomain vector on nodes */ 493 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 494 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 495 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 496 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 497 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 498 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 499 /* assemble subdomain vector of lagrange multipliers */ 500 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 501 if (n_vertices) { 502 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 503 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 504 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 505 } 506 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 507 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 508 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 509 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 510 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 511 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 512 /* check saddle point solution */ 513 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 514 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 515 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 516 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 517 /* shift by the identity matrix */ 518 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 519 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 520 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 521 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 522 } 523 } 524 /* call assembling routines for local coarse basis */ 525 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 526 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 527 if (pcbddc->switch_static || pcbddc->dbg_flag) { 528 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 529 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 530 } 531 532 /* compute other basis functions for non-symmetric problems */ 533 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 534 if (!setsym || (setsym && !issym)) { 535 if (!pcbddc->coarse_psi_B) { 536 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 537 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 538 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 539 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 540 } 541 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 542 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 543 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 544 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 545 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 546 } 547 for (i=0;i<pcbddc->local_primal_size;i++) { 548 if (n_constraints) { 549 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 550 for (j=0;j<n_constraints;j++) { 551 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 552 } 553 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 554 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 555 } 556 if (i<n_vertices) { 557 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 558 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 559 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 560 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 561 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 562 if (n_constraints) { 563 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 564 } 565 } else { 566 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 567 } 568 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 569 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 570 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 571 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 572 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 573 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 574 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 575 if (i<n_vertices) { 576 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 577 } 578 if (pcbddc->switch_static || pcbddc->dbg_flag) { 579 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 580 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 581 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 582 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 583 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 584 } 585 586 if (pcbddc->dbg_flag) { 587 /* assemble subdomain vector on nodes */ 588 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 589 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 590 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 591 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 592 if (i<n_vertices) { 593 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 594 } 595 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 596 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 597 /* assemble subdomain vector of lagrange multipliers */ 598 for (j=0;j<pcbddc->local_primal_size;j++) { 599 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 600 } 601 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 602 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 603 /* check saddle point solution */ 604 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 605 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 606 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 607 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 608 /* shift by the identity matrix */ 609 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 610 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 611 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 612 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 613 } 614 } 615 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 616 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 617 if (pcbddc->switch_static || pcbddc->dbg_flag) { 618 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 619 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 620 } 621 } 622 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 623 /* Checking coarse_sub_mat and coarse basis functios */ 624 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 625 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 626 if (pcbddc->dbg_flag) { 627 Mat coarse_sub_mat; 628 Mat AUXMAT,TM1,TM2,TM3,TM4; 629 Mat coarse_phi_D,coarse_phi_B; 630 Mat coarse_psi_D,coarse_psi_B; 631 Mat A_II,A_BB,A_IB,A_BI; 632 MatType checkmattype=MATSEQAIJ; 633 PetscReal real_value; 634 635 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 636 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 637 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 638 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 639 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 640 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 641 if (pcbddc->coarse_psi_B) { 642 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 643 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 644 } 645 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 646 647 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 648 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 649 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 650 if (pcbddc->coarse_psi_B) { 651 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 652 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 653 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 654 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 655 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 656 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 657 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 658 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 659 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 660 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 661 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 662 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 663 } else { 664 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 665 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 666 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 667 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 668 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 669 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 670 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 671 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 672 } 673 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 674 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 675 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 676 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 677 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 678 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 679 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 680 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 681 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 682 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 683 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 684 for (i=0;i<pcbddc->local_primal_size;i++) { 685 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 686 } 687 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 688 for (i=0;i<pcbddc->local_primal_size;i++) { 689 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 690 } 691 if (pcbddc->coarse_psi_B) { 692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 693 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 695 } 696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 697 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 699 } 700 } 701 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 702 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 703 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 704 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 705 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 706 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 707 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 708 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 709 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 710 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 711 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 712 if (pcbddc->coarse_psi_B) { 713 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 714 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 715 } 716 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 717 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 718 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 719 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 720 } 721 /* free memory */ 722 if (n_vertices) { 723 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 724 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 725 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 726 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 727 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 728 } 729 if (n_constraints) { 730 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 731 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 732 ierr = MatDestroy(&M1);CHKERRQ(ierr); 733 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 734 } 735 ierr = PetscFree(auxindices);CHKERRQ(ierr); 736 /* get back data */ 737 *coarse_submat_vals_n = coarse_submat_vals; 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 743 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 744 { 745 PC_IS* pcis = (PC_IS*)(pc->data); 746 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 747 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 748 PetscBool issbaij,isbaij; 749 /* manage repeated solves */ 750 MatReuse reuse; 751 MatStructure matstruct; 752 PetscErrorCode ierr; 753 754 PetscFunctionBegin; 755 if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) { 756 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 757 } 758 /* get mat flags */ 759 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 760 reuse = MAT_INITIAL_MATRIX; 761 if (pc->setupcalled) { 762 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 763 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 764 if (matstruct == SAME_NONZERO_PATTERN) { 765 reuse = MAT_REUSE_MATRIX; 766 } else { 767 reuse = MAT_INITIAL_MATRIX; 768 } 769 } 770 if (reuse == MAT_INITIAL_MATRIX) { 771 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 772 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 773 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 774 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 775 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 776 } 777 778 /* transform local matrices if needed */ 779 if (pcbddc->use_change_of_basis) { 780 Mat change_mat_all; 781 PetscScalar *row_cmat_values; 782 PetscInt *row_cmat_indices; 783 PetscInt *nnz,*is_indices,*temp_indices; 784 PetscInt i,j,k,n_D,n_B; 785 786 /* Get Non-overlapping dimensions */ 787 n_B = pcis->n_B; 788 n_D = pcis->n-n_B; 789 790 /* compute nonzero structure of change of basis on all local nodes */ 791 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 792 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 793 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 794 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 795 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 796 k=1; 797 for (i=0;i<n_B;i++) { 798 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 799 nnz[is_indices[i]]=j; 800 if (k < j) k = j; 801 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 802 } 803 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 804 /* assemble change of basis matrix on the whole set of local dofs */ 805 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 806 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 807 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 808 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 809 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 810 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 811 for (i=0;i<n_D;i++) { 812 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 813 } 814 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 815 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 816 for (i=0;i<n_B;i++) { 817 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 818 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 819 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 820 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 821 } 822 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 824 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 825 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 826 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 827 if (!issbaij && !isbaij) { 828 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 829 } else { 830 Mat work_mat; 831 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 832 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 833 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 834 } 835 /* 836 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 837 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 838 */ 839 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 840 ierr = PetscFree(nnz);CHKERRQ(ierr); 841 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 842 } else { 843 /* without change of basis, the local matrix is unchanged */ 844 if (!pcbddc->local_mat) { 845 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 846 pcbddc->local_mat = matis->A; 847 } 848 } 849 850 /* get submatrices */ 851 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 852 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 853 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 854 if (!issbaij) { 855 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 856 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 857 } else { 858 Mat newmat; 859 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 860 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 861 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 862 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 863 } 864 PetscFunctionReturn(0); 865 } 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 869 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 870 { 871 PC_IS* pcis = (PC_IS*)(pc->data); 872 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 873 IS is_aux1,is_aux2; 874 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 875 PetscInt n_vertices,i,j,n_R,n_D,n_B; 876 PetscInt vbs,bs; 877 PetscBT bitmask; 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 /* 882 No need to setup local scatters if 883 - primal space is unchanged 884 AND 885 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 886 AND 887 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 888 */ 889 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 890 PetscFunctionReturn(0); 891 } 892 /* destroy old objects */ 893 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 894 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 895 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 896 /* Set Non-overlapping dimensions */ 897 n_B = pcis->n_B; n_D = pcis->n - n_B; 898 n_vertices = pcbddc->n_actual_vertices; 899 /* create auxiliary bitmask */ 900 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 901 for (i=0;i<n_vertices;i++) { 902 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 903 } 904 905 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 906 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 907 for (i=0, n_R=0; i<pcis->n; i++) { 908 if (!PetscBTLookup(bitmask,i)) { 909 idx_R_local[n_R] = i; 910 n_R++; 911 } 912 } 913 914 /* Block code */ 915 vbs = 1; 916 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 917 if (bs>1 && !(n_vertices%bs)) { 918 PetscBool is_blocked = PETSC_TRUE; 919 PetscInt *vary; 920 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 921 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 922 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 923 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 924 for (i=0; i<n_vertices; i++) { 925 if (vary[i]!=0 && vary[i]!=bs) { 926 is_blocked = PETSC_FALSE; 927 break; 928 } 929 } 930 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 931 vbs = bs; 932 for (i=0;i<n_R/vbs;i++) { 933 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 934 } 935 } 936 ierr = PetscFree(vary);CHKERRQ(ierr); 937 } 938 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 939 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 940 941 /* print some info if requested */ 942 if (pcbddc->dbg_flag) { 943 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 944 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 945 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 946 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 947 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 948 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 949 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 950 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 951 } 952 953 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 954 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 955 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 956 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 957 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 958 for (i=0; i<n_D; i++) { 959 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 960 } 961 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 962 for (i=0, j=0; i<n_R; i++) { 963 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 964 aux_array1[j++] = i; 965 } 966 } 967 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 968 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 969 for (i=0, j=0; i<n_B; i++) { 970 if (!PetscBTLookup(bitmask,is_indices[i])) { 971 aux_array2[j++] = i; 972 } 973 } 974 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 975 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 976 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 977 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 978 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 979 980 if (pcbddc->switch_static || pcbddc->dbg_flag) { 981 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 982 for (i=0, j=0; i<n_R; i++) { 983 if (PetscBTLookup(bitmask,idx_R_local[i])) { 984 aux_array1[j++] = i; 985 } 986 } 987 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 988 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 989 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 990 } 991 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 992 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 997 #undef __FUNCT__ 998 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 999 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1000 { 1001 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1002 PC_IS *pcis = (PC_IS*)pc->data; 1003 PC pc_temp; 1004 Mat A_RR; 1005 MatStructure matstruct; 1006 MatReuse reuse; 1007 PetscScalar m_one = -1.0; 1008 PetscReal value; 1009 PetscInt n_D,n_R,ibs,mbs; 1010 PetscBool use_exact,use_exact_reduced,issbaij; 1011 PetscErrorCode ierr; 1012 /* prefixes stuff */ 1013 char dir_prefix[256],neu_prefix[256],str_level[3]; 1014 size_t len; 1015 1016 PetscFunctionBegin; 1017 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 1018 1019 /* compute prefixes */ 1020 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1021 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1022 if (!pcbddc->current_level) { 1023 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1024 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1025 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1026 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1027 } else { 1028 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1029 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 1030 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1031 len -= 15; /* remove "pc_bddc_coarse_" */ 1032 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 1033 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1034 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1035 *(dir_prefix+len)='\0'; 1036 *(neu_prefix+len)='\0'; 1037 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1038 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1039 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1040 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1041 } 1042 1043 /* DIRICHLET PROBLEM */ 1044 /* Matrix for Dirichlet problem is pcis->A_II */ 1045 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1046 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1047 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1048 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1049 /* default */ 1050 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1051 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1052 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1053 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1054 if (issbaij) { 1055 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1056 } else { 1057 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1058 } 1059 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1060 } 1061 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 1062 /* Allow user's customization */ 1063 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1064 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1065 if (!n_D) { 1066 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1067 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1068 } 1069 /* Set Up KSP for Dirichlet problem of BDDC */ 1070 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1071 /* set ksp_D into pcis data */ 1072 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1073 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1074 pcis->ksp_D = pcbddc->ksp_D; 1075 1076 /* NEUMANN PROBLEM */ 1077 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1078 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1079 if (pcbddc->ksp_R) { /* already created ksp */ 1080 PetscInt nn_R; 1081 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr); 1082 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1083 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1084 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1085 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1086 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1087 reuse = MAT_INITIAL_MATRIX; 1088 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1089 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1090 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1091 reuse = MAT_INITIAL_MATRIX; 1092 } else { /* safe to reuse the matrix */ 1093 reuse = MAT_REUSE_MATRIX; 1094 } 1095 } 1096 /* last check */ 1097 if (matstruct == DIFFERENT_NONZERO_PATTERN) { 1098 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1099 reuse = MAT_INITIAL_MATRIX; 1100 } 1101 } else { /* first time, so we need to create the matrix */ 1102 reuse = MAT_INITIAL_MATRIX; 1103 } 1104 /* extract A_RR */ 1105 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1106 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1107 if (ibs != mbs) { 1108 Mat newmat; 1109 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1110 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1111 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1112 } else { 1113 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1114 } 1115 if (!pcbddc->ksp_R) { /* create object if not present */ 1116 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1117 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1118 /* default */ 1119 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1120 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1121 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1122 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1123 if (issbaij) { 1124 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1125 } else { 1126 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1127 } 1128 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1129 } 1130 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1131 /* Allow user's customization */ 1132 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1133 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1134 if (!n_R) { 1135 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1136 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1137 } 1138 /* Set Up KSP for Neumann problem of BDDC */ 1139 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1140 1141 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1142 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1143 /* Dirichlet */ 1144 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1145 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1146 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1147 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1148 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1149 /* need to be adapted? */ 1150 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1151 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1152 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1153 /* print info */ 1154 if (pcbddc->dbg_flag) { 1155 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1156 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1157 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1158 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1159 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1160 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1161 } 1162 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1163 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1164 } 1165 1166 /* Neumann */ 1167 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1168 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1169 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1170 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1171 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1172 /* need to be adapted? */ 1173 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1174 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1175 /* print info */ 1176 if (pcbddc->dbg_flag) { 1177 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1178 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1179 } 1180 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1181 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1182 } 1183 } 1184 /* free Neumann problem's matrix */ 1185 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1191 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1192 { 1193 PetscErrorCode ierr; 1194 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1195 1196 PetscFunctionBegin; 1197 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1198 if (pcbddc->local_auxmat1) { 1199 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1200 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1201 } 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1207 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1208 { 1209 PetscErrorCode ierr; 1210 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1211 PC_IS* pcis = (PC_IS*) (pc->data); 1212 const PetscScalar zero = 0.0; 1213 1214 PetscFunctionBegin; 1215 /* Application of PHI^T (or PSI^T) */ 1216 if (pcbddc->coarse_psi_B) { 1217 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1218 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1219 } else { 1220 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1221 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1222 } 1223 /* Scatter data of coarse_rhs */ 1224 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1225 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1226 1227 /* Local solution on R nodes */ 1228 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1229 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1231 if (pcbddc->switch_static) { 1232 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1234 } 1235 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1236 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1237 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 if (pcbddc->switch_static) { 1240 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1241 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1242 } 1243 1244 /* Coarse solution */ 1245 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1247 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1248 } 1249 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1250 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1251 1252 /* Sum contributions from two levels */ 1253 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1254 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1260 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1261 { 1262 PetscErrorCode ierr; 1263 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1264 1265 PetscFunctionBegin; 1266 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1272 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1273 { 1274 PetscErrorCode ierr; 1275 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1276 1277 PetscFunctionBegin; 1278 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 /* uncomment for testing purposes */ 1283 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1286 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1287 { 1288 PetscErrorCode ierr; 1289 PC_IS* pcis = (PC_IS*)(pc->data); 1290 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1291 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1292 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1293 MatType impMatType=MATSEQAIJ; 1294 /* one and zero */ 1295 PetscScalar one=1.0,zero=0.0; 1296 /* space to store constraints and their local indices */ 1297 PetscScalar *temp_quadrature_constraint; 1298 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1299 /* iterators */ 1300 PetscInt i,j,k,total_counts,temp_start_ptr; 1301 /* stuff to store connected components stored in pcbddc->mat_graph */ 1302 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1303 PetscInt n_ISForFaces,n_ISForEdges; 1304 /* near null space stuff */ 1305 MatNullSpace nearnullsp; 1306 const Vec *nearnullvecs; 1307 Vec *localnearnullsp; 1308 PetscBool nnsp_has_cnst; 1309 PetscInt nnsp_size; 1310 PetscScalar *array; 1311 /* BLAS integers */ 1312 PetscBLASInt lwork,lierr; 1313 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1314 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1315 /* LAPACK working arrays for SVD or POD */ 1316 PetscBool skip_lapack; 1317 PetscScalar *work; 1318 PetscReal *singular_vals; 1319 #if defined(PETSC_USE_COMPLEX) 1320 PetscReal *rwork; 1321 #endif 1322 #if defined(PETSC_MISSING_LAPACK_GESVD) 1323 PetscBLASInt Blas_one_2=1; 1324 PetscScalar *temp_basis,*correlation_mat; 1325 #else 1326 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1327 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1328 #endif 1329 /* reuse */ 1330 PetscInt olocal_primal_size; 1331 PetscInt *oprimal_indices_local_idxs; 1332 /* change of basis */ 1333 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1334 PetscBool boolforchange,qr_needed; 1335 PetscBT touched,change_basis,qr_needed_idx; 1336 /* auxiliary stuff */ 1337 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1338 /* some quantities */ 1339 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1340 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1341 1342 1343 PetscFunctionBegin; 1344 /* Destroy Mat objects computed previously */ 1345 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1346 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1347 /* Get index sets for faces, edges and vertices from graph */ 1348 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1349 pcbddc->use_vertices = PETSC_TRUE; 1350 } 1351 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1352 /* HACK: provide functions to set change of basis */ 1353 if (!ISForVertices && pcbddc->NullSpace) { 1354 pcbddc->use_change_of_basis = PETSC_TRUE; 1355 pcbddc->use_change_on_faces = PETSC_FALSE; 1356 } 1357 /* print some info */ 1358 if (pcbddc->dbg_flag) { 1359 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1360 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1361 i = 0; 1362 if (ISForVertices) { 1363 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1364 } 1365 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1366 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1367 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1368 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1369 } 1370 /* check if near null space is attached to global mat */ 1371 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1372 if (nearnullsp) { 1373 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1374 /* remove any stored info */ 1375 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1376 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1377 /* store information for BDDC solver reuse */ 1378 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1379 pcbddc->onearnullspace = nearnullsp; 1380 ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1381 for (i=0;i<nnsp_size;i++) { 1382 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1383 } 1384 } else { /* if near null space is not provided BDDC uses constants by default */ 1385 nnsp_size = 0; 1386 nnsp_has_cnst = PETSC_TRUE; 1387 } 1388 /* get max number of constraints on a single cc */ 1389 max_constraints = nnsp_size; 1390 if (nnsp_has_cnst) max_constraints++; 1391 1392 /* 1393 Evaluate maximum storage size needed by the procedure 1394 - temp_indices will contain start index of each constraint stored as follows 1395 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1396 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1397 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1398 */ 1399 total_counts = n_ISForFaces+n_ISForEdges; 1400 total_counts *= max_constraints; 1401 n_vertices = 0; 1402 if (ISForVertices) { 1403 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1404 } 1405 total_counts += n_vertices; 1406 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1407 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1408 total_counts = 0; 1409 max_size_of_constraint = 0; 1410 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1411 if (i<n_ISForEdges) { 1412 used_IS = &ISForEdges[i]; 1413 } else { 1414 used_IS = &ISForFaces[i-n_ISForEdges]; 1415 } 1416 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1417 total_counts += j; 1418 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1419 } 1420 total_counts *= max_constraints; 1421 total_counts += n_vertices; 1422 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1423 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1424 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1425 /* get local part of global near null space vectors */ 1426 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1427 for (k=0;k<nnsp_size;k++) { 1428 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1429 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1430 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1431 } 1432 1433 /* whether or not to skip lapack calls */ 1434 skip_lapack = PETSC_TRUE; 1435 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1436 1437 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1438 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1439 PetscScalar temp_work; 1440 #if defined(PETSC_MISSING_LAPACK_GESVD) 1441 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1442 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1443 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1444 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1445 #if defined(PETSC_USE_COMPLEX) 1446 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1447 #endif 1448 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1449 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1450 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1451 lwork = -1; 1452 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1453 #if !defined(PETSC_USE_COMPLEX) 1454 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1455 #else 1456 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1457 #endif 1458 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1459 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1460 #else /* on missing GESVD */ 1461 /* SVD */ 1462 PetscInt max_n,min_n; 1463 max_n = max_size_of_constraint; 1464 min_n = max_constraints; 1465 if (max_size_of_constraint < max_constraints) { 1466 min_n = max_size_of_constraint; 1467 max_n = max_constraints; 1468 } 1469 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1470 #if defined(PETSC_USE_COMPLEX) 1471 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1472 #endif 1473 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1474 lwork = -1; 1475 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1476 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1477 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1478 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1479 #if !defined(PETSC_USE_COMPLEX) 1480 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1481 #else 1482 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1483 #endif 1484 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1485 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1486 #endif /* on missing GESVD */ 1487 /* Allocate optimal workspace */ 1488 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1489 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1490 } 1491 /* Now we can loop on constraining sets */ 1492 total_counts = 0; 1493 temp_indices[0] = 0; 1494 /* vertices */ 1495 if (ISForVertices) { 1496 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1497 if (nnsp_has_cnst) { /* consider all vertices */ 1498 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1499 for (i=0;i<n_vertices;i++) { 1500 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1501 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1502 total_counts++; 1503 } 1504 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1505 PetscBool used_vertex; 1506 for (i=0;i<n_vertices;i++) { 1507 used_vertex = PETSC_FALSE; 1508 k = 0; 1509 while (!used_vertex && k<nnsp_size) { 1510 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1511 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1512 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1513 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1514 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1515 total_counts++; 1516 used_vertex = PETSC_TRUE; 1517 } 1518 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1519 k++; 1520 } 1521 } 1522 } 1523 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1524 n_vertices = total_counts; 1525 } 1526 1527 /* edges and faces */ 1528 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1529 if (i<n_ISForEdges) { 1530 used_IS = &ISForEdges[i]; 1531 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1532 } else { 1533 used_IS = &ISForFaces[i-n_ISForEdges]; 1534 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1535 } 1536 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1537 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1538 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1539 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1540 /* change of basis should not be performed on local periodic nodes */ 1541 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1542 if (nnsp_has_cnst) { 1543 PetscScalar quad_value; 1544 temp_constraints++; 1545 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1546 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1547 for (j=0;j<size_of_constraint;j++) { 1548 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1549 } 1550 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1551 total_counts++; 1552 } 1553 for (k=0;k<nnsp_size;k++) { 1554 PetscReal real_value; 1555 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1556 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1557 for (j=0;j<size_of_constraint;j++) { 1558 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1559 } 1560 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1561 /* check if array is null on the connected component */ 1562 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1563 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1564 if (real_value > 0.0) { /* keep indices and values */ 1565 temp_constraints++; 1566 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1567 total_counts++; 1568 } 1569 } 1570 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1571 valid_constraints = temp_constraints; 1572 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1573 if (!pcbddc->use_nnsp_true && temp_constraints) { 1574 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1575 1576 #if defined(PETSC_MISSING_LAPACK_GESVD) 1577 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1578 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1579 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1580 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1581 from that computed using LAPACKgesvd 1582 -> This is due to a different computation of eigenvectors in LAPACKheev 1583 -> The quality of the POD-computed basis will be the same */ 1584 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1585 /* Store upper triangular part of correlation matrix */ 1586 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1587 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1588 for (j=0;j<temp_constraints;j++) { 1589 for (k=0;k<j+1;k++) { 1590 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1591 } 1592 } 1593 /* compute eigenvalues and eigenvectors of correlation matrix */ 1594 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1595 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1596 #if !defined(PETSC_USE_COMPLEX) 1597 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1598 #else 1599 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1600 #endif 1601 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1602 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1603 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1604 j=0; 1605 while (j < temp_constraints && singular_vals[j] < tol) j++; 1606 total_counts=total_counts-j; 1607 valid_constraints = temp_constraints-j; 1608 /* scale and copy POD basis into used quadrature memory */ 1609 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1610 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1611 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1612 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1613 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1614 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1615 if (j<temp_constraints) { 1616 PetscInt ii; 1617 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1618 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1619 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1620 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1621 for (k=0;k<temp_constraints-j;k++) { 1622 for (ii=0;ii<size_of_constraint;ii++) { 1623 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1624 } 1625 } 1626 } 1627 #else /* on missing GESVD */ 1628 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1629 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1630 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1631 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1632 #if !defined(PETSC_USE_COMPLEX) 1633 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1634 #else 1635 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1636 #endif 1637 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1638 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1639 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1640 k = temp_constraints; 1641 if (k > size_of_constraint) k = size_of_constraint; 1642 j = 0; 1643 while (j < k && singular_vals[k-j-1] < tol) j++; 1644 total_counts = total_counts-temp_constraints+k-j; 1645 valid_constraints = k-j; 1646 #endif /* on missing GESVD */ 1647 } 1648 /* setting change_of_basis flag is safe now */ 1649 if (boolforchange) { 1650 for (j=0;j<valid_constraints;j++) { 1651 PetscBTSet(change_basis,total_counts-j-1); 1652 } 1653 } 1654 } 1655 /* free index sets of faces, edges and vertices */ 1656 for (i=0;i<n_ISForFaces;i++) { 1657 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1658 } 1659 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1660 for (i=0;i<n_ISForEdges;i++) { 1661 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1662 } 1663 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1664 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1665 /* map temp_indices_to_constraint in boundary numbering */ 1666 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1667 if (i != temp_indices[total_counts]) { 1668 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1669 } 1670 1671 /* free workspace */ 1672 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1673 ierr = PetscFree(work);CHKERRQ(ierr); 1674 #if defined(PETSC_USE_COMPLEX) 1675 ierr = PetscFree(rwork);CHKERRQ(ierr); 1676 #endif 1677 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1678 #if defined(PETSC_MISSING_LAPACK_GESVD) 1679 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1680 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1681 #endif 1682 } 1683 for (k=0;k<nnsp_size;k++) { 1684 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1685 } 1686 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1687 1688 /* set quantities in pcbddc data structure and store previous primal size */ 1689 /* n_vertices defines the number of subdomain corners in the primal space */ 1690 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1691 olocal_primal_size = pcbddc->local_primal_size; 1692 pcbddc->local_primal_size = total_counts; 1693 pcbddc->n_vertices = n_vertices; 1694 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1695 1696 /* Create constraint matrix */ 1697 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1698 /* so we need to set it up properly either with or without change of basis */ 1699 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1700 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1701 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1702 /* array to compute a local numbering of constraints : vertices first then constraints */ 1703 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1704 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1705 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1706 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1707 /* auxiliary stuff for basis change */ 1708 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1709 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1710 1711 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1712 total_primal_vertices=0; 1713 for (i=0;i<pcbddc->local_primal_size;i++) { 1714 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1715 if (size_of_constraint == 1) { 1716 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1717 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1718 aux_primal_minloc[total_primal_vertices]=0; 1719 total_primal_vertices++; 1720 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1721 PetscInt min_loc,min_index; 1722 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1723 /* find first untouched local node */ 1724 k = 0; 1725 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1726 min_index = global_indices[k]; 1727 min_loc = k; 1728 /* search the minimum among global nodes already untouched on the cc */ 1729 for (k=1;k<size_of_constraint;k++) { 1730 /* there can be more than one constraint on a single connected component */ 1731 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1732 min_index = global_indices[k]; 1733 min_loc = k; 1734 } 1735 } 1736 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1737 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1738 aux_primal_minloc[total_primal_vertices]=min_loc; 1739 total_primal_vertices++; 1740 } 1741 } 1742 /* determine if a QR strategy is needed for change of basis */ 1743 qr_needed = PETSC_FALSE; 1744 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1745 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1746 if (PetscBTLookup(change_basis,i)) { 1747 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1748 j = 0; 1749 for (k=0;k<size_of_constraint;k++) { 1750 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1751 j++; 1752 } 1753 } 1754 /* found more than one primal dof on the cc */ 1755 if (j > 1) { 1756 PetscBTSet(qr_needed_idx,i); 1757 qr_needed = PETSC_TRUE; 1758 } 1759 } 1760 } 1761 /* free workspace */ 1762 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1763 1764 /* permute indices in order to have a sorted set of vertices */ 1765 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1766 1767 /* nonzero structure of constraint matrix */ 1768 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1769 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1770 j=total_primal_vertices; 1771 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1772 if (!PetscBTLookup(change_basis,i)) { 1773 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1774 j++; 1775 } 1776 } 1777 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1778 ierr = PetscFree(nnz);CHKERRQ(ierr); 1779 /* set values in constraint matrix */ 1780 for (i=0;i<total_primal_vertices;i++) { 1781 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1782 } 1783 total_counts = total_primal_vertices; 1784 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1785 if (!PetscBTLookup(change_basis,i)) { 1786 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1787 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1788 total_counts++; 1789 } 1790 } 1791 /* assembling */ 1792 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1793 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1794 /* 1795 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1796 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1797 */ 1798 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1799 if (pcbddc->use_change_of_basis) { 1800 /* dual and primal dofs on a single cc */ 1801 PetscInt dual_dofs,primal_dofs; 1802 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1803 PetscInt primal_counter; 1804 /* working stuff for GEQRF */ 1805 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1806 PetscBLASInt lqr_work; 1807 /* working stuff for UNGQR */ 1808 PetscScalar *gqr_work,lgqr_work_t; 1809 PetscBLASInt lgqr_work; 1810 /* working stuff for TRTRS */ 1811 PetscScalar *trs_rhs; 1812 PetscBLASInt Blas_NRHS; 1813 /* pointers for values insertion into change of basis matrix */ 1814 PetscInt *start_rows,*start_cols; 1815 PetscScalar *start_vals; 1816 /* working stuff for values insertion */ 1817 PetscBT is_primal; 1818 1819 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1820 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1821 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1822 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1823 /* work arrays */ 1824 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1825 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1826 /* nonzeros per row */ 1827 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1828 if (PetscBTLookup(change_basis,i)) { 1829 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1830 if (PetscBTLookup(qr_needed_idx,i)) { 1831 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1832 } else { 1833 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1834 /* get local primal index on the cc */ 1835 j = 0; 1836 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1837 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1838 } 1839 } 1840 } 1841 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1842 ierr = PetscFree(nnz);CHKERRQ(ierr); 1843 /* Set initial identity in the matrix */ 1844 for (i=0;i<pcis->n_B;i++) { 1845 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1846 } 1847 1848 if (pcbddc->dbg_flag) { 1849 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1850 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1851 } 1852 1853 1854 /* Now we loop on the constraints which need a change of basis */ 1855 /* 1856 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1857 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1858 1859 Basic blocks of change of basis matrix T computed 1860 1861 - Using the following block transformation if there is only a primal dof on the cc 1862 (in the example, primal dof is the last one of the edge in LOCAL ordering 1863 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1864 | 1 0 ... 0 1 | 1865 | 0 1 ... 0 1 | 1866 | ... | 1867 | 0 ... 1 1 | 1868 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1869 1870 - via QR decomposition of constraints otherwise 1871 */ 1872 if (qr_needed) { 1873 /* space to store Q */ 1874 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1875 /* first we issue queries for optimal work */ 1876 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1877 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1878 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1879 lqr_work = -1; 1880 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1881 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1882 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1883 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1884 lgqr_work = -1; 1885 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1886 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1887 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1888 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1889 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1890 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1891 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1892 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1893 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1894 /* array to store scaling factors for reflectors */ 1895 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1896 /* array to store rhs and solution of triangular solver */ 1897 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1898 /* allocating workspace for check */ 1899 if (pcbddc->dbg_flag) { 1900 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1901 } 1902 } 1903 /* array to store whether a node is primal or not */ 1904 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1905 ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr); 1906 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 1907 if (i != total_primal_vertices) { 1908 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 1909 } 1910 for (i=0;i<total_primal_vertices;i++) { 1911 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 1912 } 1913 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 1914 1915 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1916 /* -> using implicit ordering contained in temp_indices data */ 1917 total_counts = pcbddc->n_vertices; 1918 primal_counter = total_counts; 1919 while (total_counts<pcbddc->local_primal_size) { 1920 primal_dofs = 1; 1921 if (PetscBTLookup(change_basis,total_counts)) { 1922 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1923 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1924 primal_dofs++; 1925 } 1926 /* get constraint info */ 1927 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1928 dual_dofs = size_of_constraint-primal_dofs; 1929 1930 if (pcbddc->dbg_flag) { 1931 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1932 } 1933 1934 if (primal_dofs > 1) { /* QR */ 1935 1936 /* copy quadrature constraints for change of basis check */ 1937 if (pcbddc->dbg_flag) { 1938 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1939 } 1940 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1941 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1942 1943 /* compute QR decomposition of constraints */ 1944 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1945 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1946 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1947 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1948 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1949 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1950 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1951 1952 /* explictly compute R^-T */ 1953 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1954 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1955 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1956 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1957 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1958 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1959 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1960 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1961 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1962 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1963 1964 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1965 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1966 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1967 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1968 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1969 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1970 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1971 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1972 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1973 1974 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1975 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1976 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1977 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1978 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1979 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1980 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1981 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1982 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1983 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1984 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1985 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1986 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1987 1988 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1989 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1990 /* insert cols for primal dofs */ 1991 for (j=0;j<primal_dofs;j++) { 1992 start_vals = &qr_basis[j*size_of_constraint]; 1993 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1994 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1995 } 1996 /* insert cols for dual dofs */ 1997 for (j=0,k=0;j<dual_dofs;k++) { 1998 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1999 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2000 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2001 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2002 j++; 2003 } 2004 } 2005 2006 /* check change of basis */ 2007 if (pcbddc->dbg_flag) { 2008 PetscInt ii,jj; 2009 PetscBool valid_qr=PETSC_TRUE; 2010 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2011 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2012 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2013 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2014 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2015 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2016 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2017 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2018 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2019 for (jj=0;jj<size_of_constraint;jj++) { 2020 for (ii=0;ii<primal_dofs;ii++) { 2021 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2022 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2023 } 2024 } 2025 if (!valid_qr) { 2026 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2027 for (jj=0;jj<size_of_constraint;jj++) { 2028 for (ii=0;ii<primal_dofs;ii++) { 2029 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2030 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2031 } 2032 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2033 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2034 } 2035 } 2036 } 2037 } else { 2038 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2039 } 2040 } 2041 } else { /* simple transformation block */ 2042 PetscInt row,col; 2043 PetscScalar val; 2044 for (j=0;j<size_of_constraint;j++) { 2045 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2046 if (!PetscBTLookup(is_primal,row)) { 2047 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2048 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2049 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2050 } else { 2051 for (k=0;k<size_of_constraint;k++) { 2052 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2053 if (row != col) { 2054 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2055 } else { 2056 val = 1.0; 2057 } 2058 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2059 } 2060 } 2061 } 2062 } 2063 /* increment primal counter */ 2064 primal_counter += primal_dofs; 2065 } else { 2066 if (pcbddc->dbg_flag) { 2067 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2068 } 2069 } 2070 /* increment constraint counter total_counts */ 2071 total_counts += primal_dofs; 2072 } 2073 2074 /* free workspace */ 2075 if (qr_needed) { 2076 if (pcbddc->dbg_flag) { 2077 ierr = PetscFree(work);CHKERRQ(ierr); 2078 } 2079 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2080 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2081 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2082 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2083 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2084 } 2085 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2086 /* assembling */ 2087 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2088 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2089 /* 2090 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2091 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2092 */ 2093 } 2094 2095 /* get indices in local ordering for vertices and constraints */ 2096 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2097 ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr); 2098 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2099 } 2100 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2101 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2102 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2103 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2104 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2105 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2106 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2107 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2108 /* set quantities in PCBDDC data struct */ 2109 pcbddc->n_actual_vertices = i; 2110 /* check if a new primal space has been introduced */ 2111 pcbddc->new_primal_space_local = PETSC_TRUE; 2112 if (olocal_primal_size == pcbddc->local_primal_size) { 2113 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2114 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2115 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2116 } 2117 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2118 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2119 2120 /* flush dbg viewer */ 2121 if (pcbddc->dbg_flag) { 2122 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2123 } 2124 2125 /* free workspace */ 2126 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2127 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2128 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2129 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2130 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2131 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2132 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2133 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2134 PetscFunctionReturn(0); 2135 } 2136 2137 #undef __FUNCT__ 2138 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2139 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2140 { 2141 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2142 PC_IS *pcis = (PC_IS*)pc->data; 2143 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2144 PetscInt ierr,i,vertex_size; 2145 PetscViewer viewer=pcbddc->dbg_viewer; 2146 2147 PetscFunctionBegin; 2148 /* Reset previously computed graph */ 2149 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2150 /* Init local Graph struct */ 2151 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2152 2153 /* Check validity of the csr graph passed in by the user */ 2154 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2155 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2156 } 2157 2158 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2159 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2160 Mat mat_adj; 2161 const PetscInt *xadj,*adjncy; 2162 PetscBool flg_row=PETSC_TRUE; 2163 2164 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2165 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2166 if (!flg_row) { 2167 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2168 } 2169 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2170 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2171 if (!flg_row) { 2172 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2173 } 2174 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2175 } 2176 2177 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2178 vertex_size = 1; 2179 if (pcbddc->user_provided_isfordofs) { 2180 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2181 ierr = PetscMalloc(pcbddc->n_ISForDofs*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2182 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2183 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2184 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2185 } 2186 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2187 pcbddc->n_ISForDofs = 0; 2188 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2189 } 2190 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2191 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2192 } else { 2193 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2194 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2195 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2196 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2197 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2198 } 2199 } 2200 } 2201 2202 /* Setup of Graph */ 2203 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2204 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2205 } 2206 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2207 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2208 } 2209 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2210 2211 /* Graph's connected components analysis */ 2212 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2213 2214 /* print some info to stdout */ 2215 if (pcbddc->dbg_flag) { 2216 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2217 } 2218 2219 /* mark topography has done */ 2220 pcbddc->recompute_topography = PETSC_FALSE; 2221 PetscFunctionReturn(0); 2222 } 2223 2224 #undef __FUNCT__ 2225 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2226 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2227 { 2228 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2229 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2230 PetscErrorCode ierr; 2231 2232 PetscFunctionBegin; 2233 n = 0; 2234 vertices = 0; 2235 if (pcbddc->ConstraintMatrix) { 2236 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2237 for (i=0;i<local_primal_size;i++) { 2238 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2239 if (size_of_constraint == 1) n++; 2240 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2241 } 2242 if (vertices_idx) { 2243 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2244 n = 0; 2245 for (i=0;i<local_primal_size;i++) { 2246 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2247 if (size_of_constraint == 1) { 2248 vertices[n++]=row_cmat_indices[0]; 2249 } 2250 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2251 } 2252 } 2253 } 2254 *n_vertices = n; 2255 if (vertices_idx) *vertices_idx = vertices; 2256 PetscFunctionReturn(0); 2257 } 2258 2259 #undef __FUNCT__ 2260 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2261 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2262 { 2263 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2264 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2265 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2266 PetscBT touched; 2267 PetscErrorCode ierr; 2268 2269 /* This function assumes that the number of local constraints per connected component 2270 is not greater than the number of nodes defined for the connected component 2271 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2272 PetscFunctionBegin; 2273 n = 0; 2274 constraints_index = 0; 2275 if (pcbddc->ConstraintMatrix) { 2276 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2277 max_size_of_constraint = 0; 2278 for (i=0;i<local_primal_size;i++) { 2279 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2280 if (size_of_constraint > 1) { 2281 n++; 2282 } 2283 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2284 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2285 } 2286 if (constraints_idx) { 2287 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2288 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2289 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2290 n = 0; 2291 for (i=0;i<local_primal_size;i++) { 2292 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2293 if (size_of_constraint > 1) { 2294 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2295 /* find first untouched local node */ 2296 j = 0; 2297 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2298 min_index = row_cmat_global_indices[j]; 2299 min_loc = j; 2300 /* search the minimum among nodes not yet touched on the connected component 2301 since there can be more than one constraint on a single cc */ 2302 for (j=1;j<size_of_constraint;j++) { 2303 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2304 min_index = row_cmat_global_indices[j]; 2305 min_loc = j; 2306 } 2307 } 2308 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2309 constraints_index[n++] = row_cmat_indices[min_loc]; 2310 } 2311 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2312 } 2313 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2314 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2315 } 2316 } 2317 *n_constraints = n; 2318 if (constraints_idx) *constraints_idx = constraints_index; 2319 PetscFunctionReturn(0); 2320 } 2321 2322 /* the next two functions has been adapted from pcis.c */ 2323 #undef __FUNCT__ 2324 #define __FUNCT__ "PCBDDCApplySchur" 2325 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2326 { 2327 PetscErrorCode ierr; 2328 PC_IS *pcis = (PC_IS*)(pc->data); 2329 2330 PetscFunctionBegin; 2331 if (!vec2_B) { vec2_B = v; } 2332 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2333 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2334 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2335 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2336 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2337 PetscFunctionReturn(0); 2338 } 2339 2340 #undef __FUNCT__ 2341 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2342 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2343 { 2344 PetscErrorCode ierr; 2345 PC_IS *pcis = (PC_IS*)(pc->data); 2346 2347 PetscFunctionBegin; 2348 if (!vec2_B) { vec2_B = v; } 2349 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2350 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2351 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2352 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2353 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2354 PetscFunctionReturn(0); 2355 } 2356 2357 #undef __FUNCT__ 2358 #define __FUNCT__ "PCBDDCSubsetNumbering" 2359 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2360 { 2361 Vec local_vec,global_vec; 2362 IS seqis,paris; 2363 VecScatter scatter_ctx; 2364 PetscScalar *array; 2365 PetscInt *temp_global_dofs; 2366 PetscScalar globalsum; 2367 PetscInt i,j,s; 2368 PetscInt nlocals,first_index,old_index,max_local; 2369 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2370 PetscMPIInt *dof_sizes,*dof_displs; 2371 PetscBool first_found; 2372 PetscErrorCode ierr; 2373 2374 PetscFunctionBegin; 2375 /* mpi buffers */ 2376 MPI_Comm_size(comm,&size_prec_comm); 2377 MPI_Comm_rank(comm,&rank_prec_comm); 2378 j = ( !rank_prec_comm ? size_prec_comm : 0); 2379 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2380 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2381 /* get maximum size of subset */ 2382 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2383 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2384 max_local = 0; 2385 if (n_local_dofs) { 2386 max_local = temp_global_dofs[0]; 2387 for (i=1;i<n_local_dofs;i++) { 2388 if (max_local < temp_global_dofs[i] ) { 2389 max_local = temp_global_dofs[i]; 2390 } 2391 } 2392 } 2393 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2394 max_global++; 2395 max_local = 0; 2396 if (n_local_dofs) { 2397 max_local = local_dofs[0]; 2398 for (i=1;i<n_local_dofs;i++) { 2399 if (max_local < local_dofs[i] ) { 2400 max_local = local_dofs[i]; 2401 } 2402 } 2403 } 2404 max_local++; 2405 /* allocate workspace */ 2406 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2407 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2408 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2409 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2410 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2411 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2412 /* create scatter */ 2413 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2414 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2415 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2416 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2417 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2418 /* init array */ 2419 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2420 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2421 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2422 if (local_dofs_mult) { 2423 for (i=0;i<n_local_dofs;i++) { 2424 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2425 } 2426 } else { 2427 for (i=0;i<n_local_dofs;i++) { 2428 array[local_dofs[i]]=1.0; 2429 } 2430 } 2431 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2432 /* scatter into global vec and get total number of global dofs */ 2433 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2434 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2435 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2436 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2437 /* Fill global_vec with cumulative function for global numbering */ 2438 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2439 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2440 nlocals = 0; 2441 first_index = -1; 2442 first_found = PETSC_FALSE; 2443 for (i=0;i<s;i++) { 2444 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2445 first_found = PETSC_TRUE; 2446 first_index = i; 2447 } 2448 nlocals += (PetscInt)PetscRealPart(array[i]); 2449 } 2450 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2451 if (!rank_prec_comm) { 2452 dof_displs[0]=0; 2453 for (i=1;i<size_prec_comm;i++) { 2454 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2455 } 2456 } 2457 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2458 if (first_found) { 2459 array[first_index] += (PetscScalar)nlocals; 2460 old_index = first_index; 2461 for (i=first_index+1;i<s;i++) { 2462 if (PetscRealPart(array[i]) > 0.0) { 2463 array[i] += array[old_index]; 2464 old_index = i; 2465 } 2466 } 2467 } 2468 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2469 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2470 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2471 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2472 /* get global ordering of local dofs */ 2473 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2474 if (local_dofs_mult) { 2475 for (i=0;i<n_local_dofs;i++) { 2476 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2477 } 2478 } else { 2479 for (i=0;i<n_local_dofs;i++) { 2480 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2481 } 2482 } 2483 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2484 /* free workspace */ 2485 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2486 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2487 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2488 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2489 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2490 /* return pointer to global ordering of local dofs */ 2491 *global_numbering_subset = temp_global_dofs; 2492 PetscFunctionReturn(0); 2493 } 2494 2495 #undef __FUNCT__ 2496 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2497 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2498 { 2499 PetscInt i,j; 2500 PetscScalar *alphas; 2501 PetscErrorCode ierr; 2502 2503 PetscFunctionBegin; 2504 /* this implements stabilized Gram-Schmidt */ 2505 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2506 for (i=0;i<n;i++) { 2507 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2508 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2509 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2510 } 2511 ierr = PetscFree(alphas);CHKERRQ(ierr); 2512 PetscFunctionReturn(0); 2513 } 2514 2515 /* TODO 2516 - now preallocation is done assuming SEQDENSE local matrices 2517 */ 2518 #undef __FUNCT__ 2519 #define __FUNCT__ "MatISGetMPIXAIJ" 2520 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M) 2521 { 2522 Mat new_mat; 2523 Mat_IS *matis = (Mat_IS*)(mat->data); 2524 /* info on mat */ 2525 /* ISLocalToGlobalMapping rmapping,cmapping; */ 2526 PetscInt bs,rows,cols; 2527 PetscInt lrows,lcols; 2528 PetscInt local_rows,local_cols; 2529 PetscBool isdense; 2530 /* values insertion */ 2531 PetscScalar *array; 2532 PetscInt *local_indices,*global_indices; 2533 /* work */ 2534 PetscInt i,j,index_row; 2535 PetscErrorCode ierr; 2536 2537 PetscFunctionBegin; 2538 /* MISSING CHECKS 2539 - rectangular case not covered (it is not allowed by MATIS) 2540 */ 2541 /* get info from mat */ 2542 /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */ 2543 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2544 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2545 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2546 2547 /* work */ 2548 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2549 for (i=0;i<local_rows;i++) local_indices[i]=i; 2550 /* map indices of local mat to global values */ 2551 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2552 /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */ 2553 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2554 2555 if (reuse==MAT_INITIAL_MATRIX) { 2556 Vec vec_dnz,vec_onz; 2557 PetscScalar *my_dnz,*my_onz; 2558 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2559 PetscInt index_col,owner; 2560 PetscMPIInt nsubdomains; 2561 2562 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2563 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2564 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2565 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2566 ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr); 2567 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2568 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2569 2570 /* 2571 preallocation 2572 */ 2573 2574 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2575 /* 2576 Some vectors are needed to sum up properly on shared interface dofs. 2577 Preallocation macros cannot do the job. 2578 Note that preallocation is not exact, since it overestimates nonzeros 2579 */ 2580 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2581 /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */ 2582 ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr); 2583 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2584 /* All processes need to compute entire row ownership */ 2585 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2586 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2587 for (i=0;i<nsubdomains;i++) { 2588 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2589 row_ownership[j]=i; 2590 } 2591 } 2592 2593 /* 2594 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2595 then, they will be summed up properly. This way, preallocation is always sufficient 2596 */ 2597 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2598 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2599 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2600 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2601 for (i=0;i<local_rows;i++) { 2602 index_row = global_indices[i]; 2603 for (j=i;j<local_rows;j++) { 2604 owner = row_ownership[index_row]; 2605 index_col = global_indices[j]; 2606 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2607 my_dnz[i] += 1.0; 2608 } else { /* offdiag block */ 2609 my_onz[i] += 1.0; 2610 } 2611 /* same as before, interchanging rows and cols */ 2612 if (i != j) { 2613 owner = row_ownership[index_col]; 2614 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2615 my_dnz[j] += 1.0; 2616 } else { 2617 my_onz[j] += 1.0; 2618 } 2619 } 2620 } 2621 } 2622 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2623 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2624 if (local_rows) { /* multilevel guard */ 2625 ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2626 ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2627 } 2628 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2629 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2630 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2631 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2632 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2633 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2634 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2635 2636 /* set computed preallocation in dnz and onz */ 2637 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2638 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2639 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2640 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2641 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2642 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2643 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2644 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2645 2646 /* Resize preallocation if overestimated */ 2647 for (i=0;i<lrows;i++) { 2648 dnz[i] = PetscMin(dnz[i],lcols); 2649 onz[i] = PetscMin(onz[i],cols-lcols); 2650 } 2651 /* set preallocation */ 2652 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2653 for (i=0;i<lrows/bs;i++) { 2654 dnz[i] = dnz[i*bs]/bs; 2655 onz[i] = onz[i*bs]/bs; 2656 } 2657 ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2658 for (i=0;i<lrows/bs;i++) { 2659 dnz[i] = dnz[i]-i; 2660 } 2661 ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2662 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2663 *M = new_mat; 2664 } else { 2665 PetscInt mbs,mrows,mcols; 2666 /* some checks */ 2667 ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); 2668 ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); 2669 if (mrows != rows) { 2670 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); 2671 } 2672 if (mrows != rows) { 2673 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); 2674 } 2675 if (mbs != bs) { 2676 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); 2677 } 2678 ierr = MatZeroEntries(*M);CHKERRQ(ierr); 2679 } 2680 /* set local to global mappings */ 2681 /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */ 2682 /* Set values */ 2683 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2684 if (isdense) { /* special case for dense local matrices */ 2685 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2686 ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr); 2687 ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2688 ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr); 2689 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2690 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2691 } else { /* very basic values insertion for all other matrix types */ 2692 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2693 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2694 for (i=0;i<local_rows;i++) { 2695 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2696 /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */ 2697 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2698 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2699 ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2700 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2701 } 2702 } 2703 ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2704 ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2705 if (isdense) { 2706 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2707 } 2708 PetscFunctionReturn(0); 2709 } 2710 2711 #undef __FUNCT__ 2712 #define __FUNCT__ "MatISGetSubassemblingPattern" 2713 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2714 { 2715 Mat subdomain_adj; 2716 IS new_ranks,ranks_send_to; 2717 MatPartitioning partitioner; 2718 Mat_IS *matis; 2719 PetscInt n_neighs,*neighs,*n_shared,**shared; 2720 PetscInt prank; 2721 PetscMPIInt size,rank,color; 2722 PetscInt *xadj,*adjncy,*oldranks; 2723 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2724 PetscInt i,j,n_subdomains,local_size,threshold=0; 2725 PetscErrorCode ierr; 2726 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2727 PetscSubcomm subcomm; 2728 2729 PetscFunctionBegin; 2730 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2731 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2732 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2733 2734 /* Get info on mapping */ 2735 matis = (Mat_IS*)(mat->data); 2736 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2737 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2738 2739 /* build local CSR graph of subdomains' connectivity */ 2740 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2741 xadj[0] = 0; 2742 xadj[1] = PetscMax(n_neighs-1,0); 2743 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2744 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2745 2746 if (threshold) { 2747 PetscInt* count,min_threshold; 2748 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2749 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2750 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2751 for (j=0;j<n_shared[i];j++) { 2752 count[shared[i][j]] += 1; 2753 } 2754 } 2755 /* adapt threshold since we dont want to lose significant connections */ 2756 min_threshold = n_neighs; 2757 for (i=1;i<n_neighs;i++) { 2758 for (j=0;j<n_shared[i];j++) { 2759 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2760 } 2761 } 2762 threshold = PetscMax(min_threshold+1,threshold); 2763 xadj[1] = 0; 2764 for (i=1;i<n_neighs;i++) { 2765 for (j=0;j<n_shared[i];j++) { 2766 if (count[shared[i][j]] < threshold) { 2767 adjncy[xadj[1]] = neighs[i]; 2768 adjncy_wgt[xadj[1]] = n_shared[i]; 2769 xadj[1]++; 2770 break; 2771 } 2772 } 2773 } 2774 ierr = PetscFree(count);CHKERRQ(ierr); 2775 } else { 2776 if (xadj[1]) { 2777 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2778 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2779 } 2780 } 2781 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2782 if (use_square) { 2783 for (i=0;i<xadj[1];i++) { 2784 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2785 } 2786 } 2787 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2788 2789 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2790 2791 /* 2792 Restrict work on active processes only. 2793 */ 2794 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2795 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2796 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2797 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2798 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2799 if (color) { 2800 ierr = PetscFree(xadj);CHKERRQ(ierr); 2801 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2802 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2803 } else { 2804 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2805 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2806 prank = rank; 2807 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2808 /* 2809 for (i=0;i<size;i++) { 2810 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2811 } 2812 */ 2813 for (i=0;i<xadj[1];i++) { 2814 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2815 } 2816 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2817 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2818 n_subdomains = (PetscInt)size; 2819 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2820 2821 /* Partition */ 2822 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2823 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2824 if (use_vwgt) { 2825 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2826 v_wgt[0] = local_size; 2827 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2828 } 2829 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); */ 2830 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2831 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2832 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2833 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2834 2835 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2836 /* ranks_send_to_idx[0] = oldranks[is_indices[0]]; */ /* contiguos set of processes */ 2837 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2838 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2839 /* clean up */ 2840 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2841 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2842 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2843 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2844 } 2845 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2846 2847 /* assemble parallel IS for sends */ 2848 i = 1; 2849 if (color) i=0; 2850 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2851 2852 /* get back IS */ 2853 *is_sends = ranks_send_to; 2854 PetscFunctionReturn(0); 2855 } 2856 2857 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2858 2859 #undef __FUNCT__ 2860 #define __FUNCT__ "MatISSubassemble" 2861 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, MatReuse reuse, Mat *mat_n) 2862 { 2863 Mat local_mat; 2864 Mat_IS *matis; 2865 IS is_sends_internal; 2866 PetscInt rows,cols; 2867 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2868 PetscBool ismatis,isdense; 2869 ISLocalToGlobalMapping l2gmap; 2870 PetscInt* l2gmap_indices; 2871 const PetscInt* is_indices; 2872 MatType new_local_type; 2873 MatTypePrivate new_local_type_private; 2874 /* buffers */ 2875 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2876 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2877 /* MPI */ 2878 MPI_Comm comm; 2879 PetscMPIInt n_sends,n_recvs,commsize; 2880 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2881 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2882 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2883 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2884 PetscErrorCode ierr; 2885 2886 PetscFunctionBegin; 2887 /* checks */ 2888 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2889 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2890 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2891 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2892 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2893 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2894 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2895 if (reuse == MAT_REUSE_MATRIX) { 2896 PetscInt mrows,mcols,mnrows,mncols; 2897 PetscBool ismatis; 2898 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2899 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2900 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2901 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2902 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2903 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2904 } 2905 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2906 PetscValidLogicalCollectiveInt(mat,bs,0); 2907 /* prepare IS for sending if not provided */ 2908 if (!is_sends) { 2909 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2910 ierr = MatISGetSubassemblingPattern(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2911 } else { 2912 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2913 is_sends_internal = is_sends; 2914 } 2915 2916 /* get pointer of MATIS data */ 2917 matis = (Mat_IS*)mat->data; 2918 2919 /* get comm */ 2920 comm = PetscObjectComm((PetscObject)mat); 2921 2922 /* compute number of sends */ 2923 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2924 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2925 2926 /* compute number of receives */ 2927 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2928 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2929 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2930 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2931 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2932 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2933 ierr = PetscFree(iflags);CHKERRQ(ierr); 2934 2935 /* prepare send/receive buffers */ 2936 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2937 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2938 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2939 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2940 2941 /* Get data from local mat */ 2942 if (!isdense) { 2943 /* TODO: See below some guidelines on how to prepare the local buffers */ 2944 /* 2945 send_buffer_vals should contain the raw values of the local matrix 2946 send_buffer_idxs should contain: 2947 - MatType_PRIVATE type 2948 - PetscInt size_of_l2gmap 2949 - PetscInt global_row_indices[size_of_l2gmap] 2950 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2951 */ 2952 } else { 2953 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2954 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2955 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2956 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2957 send_buffer_idxs[1] = i; 2958 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2959 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2960 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2961 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2962 for (i=0;i<n_sends;i++) { 2963 ilengths_vals[is_indices[i]] = len*len; 2964 ilengths_idxs[is_indices[i]] = len+2; 2965 } 2966 } 2967 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2968 buf_size_idxs = 0; 2969 buf_size_vals = 0; 2970 for (i=0;i<n_recvs;i++) { 2971 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2972 buf_size_vals += (PetscInt)olengths_vals[i]; 2973 } 2974 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2975 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2976 2977 /* get new tags for clean communications */ 2978 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2979 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2980 2981 /* allocate for requests */ 2982 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2983 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2984 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2985 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2986 2987 /* communications */ 2988 ptr_idxs = recv_buffer_idxs; 2989 ptr_vals = recv_buffer_vals; 2990 for (i=0;i<n_recvs;i++) { 2991 source_dest = onodes[i]; 2992 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2993 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2994 ptr_idxs += olengths_idxs[i]; 2995 ptr_vals += olengths_vals[i]; 2996 } 2997 for (i=0;i<n_sends;i++) { 2998 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2999 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3000 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3001 } 3002 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3003 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3004 3005 /* assemble new l2g map */ 3006 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3007 ptr_idxs = recv_buffer_idxs; 3008 buf_size_idxs = 0; 3009 for (i=0;i<n_recvs;i++) { 3010 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3011 ptr_idxs += olengths_idxs[i]; 3012 } 3013 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 3014 ptr_idxs = recv_buffer_idxs; 3015 buf_size_idxs = 0; 3016 for (i=0;i<n_recvs;i++) { 3017 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3018 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3019 ptr_idxs += olengths_idxs[i]; 3020 } 3021 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3022 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3023 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3024 3025 /* infer new local matrix type from received local matrices type */ 3026 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3027 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3028 new_local_type_private = MATAIJ_PRIVATE; 3029 new_local_type = MATSEQAIJ; 3030 if (n_recvs) { 3031 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3032 ptr_idxs = recv_buffer_idxs; 3033 for (i=0;i<n_recvs;i++) { 3034 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3035 new_local_type_private = MATAIJ_PRIVATE; 3036 break; 3037 } 3038 ptr_idxs += olengths_idxs[i]; 3039 } 3040 switch (new_local_type_private) { 3041 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 3042 new_local_type = MATSEQAIJ; 3043 bs = 1; 3044 break; 3045 case MATAIJ_PRIVATE: 3046 new_local_type = MATSEQAIJ; 3047 bs = 1; 3048 break; 3049 case MATBAIJ_PRIVATE: 3050 new_local_type = MATSEQBAIJ; 3051 break; 3052 case MATSBAIJ_PRIVATE: 3053 new_local_type = MATSEQSBAIJ; 3054 break; 3055 default: 3056 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3057 break; 3058 } 3059 } 3060 3061 /* create MATIS object if needed */ 3062 if (reuse == MAT_INITIAL_MATRIX) { 3063 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3064 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3065 } else { 3066 /* it also destroys the local matrices */ 3067 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3068 } 3069 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3070 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3071 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3072 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3073 3074 /* set values */ 3075 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3076 ptr_vals = recv_buffer_vals; 3077 ptr_idxs = recv_buffer_idxs; 3078 for (i=0;i<n_recvs;i++) { 3079 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3080 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3081 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3082 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3083 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3084 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3085 } 3086 ptr_idxs += olengths_idxs[i]; 3087 ptr_vals += olengths_vals[i]; 3088 } 3089 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3090 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3091 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3092 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3093 3094 { /* check */ 3095 Vec lvec,rvec; 3096 PetscReal infty_error; 3097 3098 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3099 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3100 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3101 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3102 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3103 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3104 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3105 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3106 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3107 } 3108 3109 /* free workspace */ 3110 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3111 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3112 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3113 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3114 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3115 if (isdense) { 3116 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3117 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3118 } else { 3119 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3120 } 3121 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3122 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3123 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3124 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3125 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3126 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3127 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3128 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3129 ierr = PetscFree(onodes);CHKERRQ(ierr); 3130 PetscFunctionReturn(0); 3131 } 3132 3133 #undef __FUNCT__ 3134 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3135 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3136 { 3137 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3138 PC_IS *pcis = (PC_IS*)pc->data; 3139 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3140 MatNullSpace CoarseNullSpace=NULL; 3141 ISLocalToGlobalMapping coarse_islg; 3142 IS coarse_is; 3143 PetscInt max_it; 3144 PetscInt im_active=-1,active_procs=-1; 3145 PC pc_temp; 3146 PCType coarse_pc_type; 3147 KSPType coarse_ksp_type; 3148 PetscBool multilevel_requested,multilevel_allowed; 3149 PetscBool setsym,issym,isherm,ispreonly,isbddc,isnn,coarse_reuse; 3150 MatStructure matstruct; 3151 PetscErrorCode ierr; 3152 3153 PetscFunctionBegin; 3154 /* Assign global numbering to coarse dofs */ 3155 if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */ 3156 PetscInt ocoarse_size; 3157 ocoarse_size = pcbddc->coarse_size; 3158 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3159 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3160 /* see if we can avoid some work */ 3161 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3162 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3163 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3164 coarse_reuse = PETSC_FALSE; 3165 } else { /* we can safely reuse already computed coarse matrix */ 3166 coarse_reuse = PETSC_TRUE; 3167 } 3168 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3169 coarse_reuse = PETSC_FALSE; 3170 } 3171 /* reset any subassembling information */ 3172 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3173 } else { /* primal space has not been changed, so we can reuse coarse matrix */ 3174 coarse_reuse = PETSC_TRUE; 3175 } 3176 3177 /* test if we can go multilevel */ 3178 multilevel_allowed = PETSC_FALSE; 3179 multilevel_requested = PETSC_FALSE; 3180 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 3181 if (multilevel_requested) { 3182 /* count "active processes" */ 3183 im_active = !!(pcis->n); 3184 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3185 if (active_procs/pcbddc->coarsening_ratio < 2) { 3186 multilevel_allowed = PETSC_FALSE; 3187 } else { 3188 multilevel_allowed = PETSC_TRUE; 3189 } 3190 } 3191 3192 /* set defaults for coarse KSP and PC */ 3193 if (multilevel_allowed) { 3194 coarse_ksp_type = KSPRICHARDSON; 3195 coarse_pc_type = PCBDDC; 3196 } else { 3197 coarse_ksp_type = KSPPREONLY; 3198 coarse_pc_type = PCREDUNDANT; 3199 } 3200 3201 /* create the coarse KSP object only once with defaults */ 3202 if (!pcbddc->coarse_ksp) { 3203 char prefix[256],str_level[3]; 3204 size_t len; 3205 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3206 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3207 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3208 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3209 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3210 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3211 /* prefix */ 3212 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3213 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3214 if (!pcbddc->current_level) { 3215 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3216 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3217 } else { 3218 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3219 if (pcbddc->current_level>1) len -= 2; 3220 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3221 *(prefix+len)='\0'; 3222 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3223 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3224 } 3225 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3226 } 3227 /* allow user customization */ 3228 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3229 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3230 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3231 3232 /* get some info after set from options */ 3233 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3234 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3235 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3236 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3237 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3238 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3239 isbddc = PETSC_FALSE; 3240 } 3241 3242 /* creates temporary l2gmap and IS for coarse indexes */ 3243 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3244 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3245 3246 /* propagate BDDC info to the next level */ 3247 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3248 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3249 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3250 if (isbddc) { /* protects from unneded computations */ 3251 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3252 const PetscInt *idxs; 3253 IS *c_isfordofs,c_isneumann; 3254 ISLocalToGlobalMapping tmap; 3255 3256 /* create map between primal indices (in local representative ordering) and local subdomain numbering */ 3257 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3258 /* allocate space for temporary storage */ 3259 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3260 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3261 /* dofs splitting */ 3262 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&c_isfordofs);CHKERRQ(ierr); 3263 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3264 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3265 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3266 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3267 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3268 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3269 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3270 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&c_isfordofs[i]);CHKERRQ(ierr); 3271 /* ierr = ISView(c_isfordofs[i],0);CHKERRQ(ierr); */ 3272 } 3273 ierr = PCBDDCSetDofsSplitting(pc_temp,pcbddc->n_ISForDofsLocal,c_isfordofs);CHKERRQ(ierr); 3274 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3275 ierr = ISDestroy(&c_isfordofs[i]);CHKERRQ(ierr); 3276 } 3277 ierr = PetscFree(c_isfordofs);CHKERRQ(ierr); 3278 /* neumann boundaries */ 3279 if (pcbddc->NeumannBoundariesLocal) { 3280 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3281 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3282 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3283 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3284 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3285 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3286 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&c_isneumann);CHKERRQ(ierr); 3287 /* ierr = ISView(c_isneumann,0);CHKERRQ(ierr); */ 3288 ierr = PCBDDCSetNeumannBoundaries(pc_temp,c_isneumann);CHKERRQ(ierr); 3289 } 3290 ierr = ISDestroy(&c_isneumann);CHKERRQ(ierr); 3291 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3292 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3293 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3294 } 3295 3296 /* creates temporary MATIS object for coarse matrix */ 3297 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3298 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3299 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3300 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3301 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3302 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3303 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3304 3305 /* assemble coarse matrix */ 3306 if (isbddc || isnn) { 3307 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3308 ierr = MatISGetSubassemblingPattern(coarse_mat_is,pcbddc->coarsening_ratio,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3309 if (pcbddc->dbg_flag) { 3310 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3311 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3312 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 3313 } 3314 } 3315 if (coarse_reuse) { 3316 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3317 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3318 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,pcbddc->coarsening_ratio,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr); 3319 } else { 3320 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,pcbddc->coarsening_ratio,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3321 } 3322 } else { 3323 if (coarse_reuse) { 3324 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3325 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3326 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr); 3327 } else { 3328 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3329 } 3330 } 3331 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3332 3333 /* create local to global scatters for coarse problem */ 3334 if (pcbddc->new_primal_space) { 3335 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3336 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 3337 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3338 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3339 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3340 } 3341 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3342 3343 /* propagate symmetry info to coarse matrix */ 3344 issym = PETSC_FALSE; 3345 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3346 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3347 isherm = PETSC_FALSE; 3348 ierr = MatIsHermitianKnown(pc->pmat,&setsym,&isherm);CHKERRQ(ierr); 3349 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3350 3351 /* Compute coarse null space (special handling by BDDC only) */ 3352 if (pcbddc->NullSpace) { 3353 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3354 if (isbddc) { 3355 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3356 } else { 3357 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3358 } 3359 } 3360 3361 /* set operators */ 3362 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3363 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3364 3365 /* additional KSP customization */ 3366 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3367 if (max_it < 5) { 3368 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3369 } 3370 3371 /* print some info if requested */ 3372 if (pcbddc->dbg_flag) { 3373 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3374 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3375 if (!multilevel_allowed) { 3376 if (multilevel_requested) { 3377 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3378 } else if (pcbddc->max_levels) { 3379 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3380 } 3381 } 3382 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3383 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3384 } 3385 3386 /* setup coarse ksp */ 3387 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3388 3389 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3390 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3391 if (pcbddc->dbg_flag || !ispreonly) { 3392 KSP check_ksp; 3393 KSPType check_ksp_type; 3394 PC check_pc; 3395 Vec check_vec; 3396 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3397 PetscInt its; 3398 PetscBool compute; 3399 3400 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3401 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3402 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3403 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3404 if (ispreonly) { 3405 check_ksp_type = KSPPREONLY; 3406 compute = PETSC_FALSE; 3407 } else { 3408 check_ksp_type = KSPGMRES; 3409 compute = PETSC_TRUE; 3410 } 3411 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3412 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3413 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3414 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3415 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3416 /* create random vec */ 3417 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3418 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3419 if (CoarseNullSpace) { 3420 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3421 } 3422 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3423 /* solve coarse problem */ 3424 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3425 if (CoarseNullSpace) { 3426 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3427 } 3428 /* set eigenvalue estimation if preonly has not been requested */ 3429 if (!ispreonly) { 3430 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3431 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Coarse problem eigenvalues: %1.6e %1.6e\n",lambda_min,lambda_max);CHKERRQ(ierr); */ 3432 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3433 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3434 } 3435 /* check coarse problem residual error */ 3436 if (pcbddc->dbg_flag) { 3437 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3438 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3439 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3440 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3441 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3442 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3443 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3444 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3445 if (!ispreonly) { 3446 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3447 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3448 } 3449 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3450 } 3451 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3452 } 3453 3454 /* print additional info */ 3455 if (pcbddc->dbg_flag) { 3456 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3457 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3458 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3459 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3460 } 3461 3462 /* free memory */ 3463 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3464 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3465 PetscFunctionReturn(0); 3466 } 3467 3468 #undef __FUNCT__ 3469 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3470 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3471 { 3472 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3473 PC_IS* pcis = (PC_IS*)pc->data; 3474 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3475 PetscInt i,coarse_size; 3476 PetscInt *local_primal_indices; 3477 PetscErrorCode ierr; 3478 3479 PetscFunctionBegin; 3480 /* Compute global number of coarse dofs */ 3481 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3482 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3483 } 3484 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3485 3486 /* check numbering */ 3487 if (pcbddc->dbg_flag) { 3488 PetscScalar coarsesum,*array; 3489 3490 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3491 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3492 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3493 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3494 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3495 for (i=0;i<pcbddc->local_primal_size;i++) { 3496 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3497 } 3498 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3499 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3500 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3501 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3502 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3503 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3504 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3505 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3506 for (i=0;i<pcis->n;i++) { 3507 if (array[i] == 1.0) { 3508 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3509 } 3510 } 3511 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3512 for (i=0;i<pcis->n;i++) { 3513 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3514 } 3515 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3516 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3517 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3518 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3519 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3520 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3521 if (pcbddc->dbg_flag > 1) { 3522 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3523 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3524 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3525 for (i=0;i<pcbddc->local_primal_size;i++) { 3526 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3527 } 3528 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3529 } 3530 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3531 } 3532 /* get back data */ 3533 *coarse_size_n = coarse_size; 3534 *local_primal_indices_n = local_primal_indices; 3535 PetscFunctionReturn(0); 3536 } 3537 3538 #undef __FUNCT__ 3539 #define __FUNCT__ "PCBDDCGlobalToLocal" 3540 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis) 3541 { 3542 PC_IS* pcis = (PC_IS*)pc->data; 3543 IS localis_t; 3544 PetscInt i,lsize,*idxs; 3545 PetscScalar *vals; 3546 PetscErrorCode ierr; 3547 3548 PetscFunctionBegin; 3549 /* get dirichlet indices in local ordering exploiting local to global map */ 3550 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 3551 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3552 for (i=0;i<lsize;i++) vals[i] = 1.0; 3553 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3554 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3555 if (idxs) { /* multilevel guard */ 3556 ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3557 } 3558 ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr); 3559 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3560 ierr = PetscFree(vals);CHKERRQ(ierr); 3561 ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr); 3562 /* now compute dirichlet set in local ordering */ 3563 ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3564 ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3565 ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3566 for (i=0,lsize=0;i<pcis->n;i++) { 3567 if (vals[i] == 1.0) { 3568 lsize++; 3569 } 3570 } 3571 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 3572 for (i=0,lsize=0;i<pcis->n;i++) { 3573 if (vals[i] == 1.0) { 3574 idxs[lsize++] = i; 3575 } 3576 } 3577 ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3578 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 3579 *localis = localis_t; 3580 PetscFunctionReturn(0); 3581 } 3582