1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscInt i; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 54 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 55 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 for (i=0;i<pcbddc->n_ISForDofs;i++) { 59 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 60 } 61 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 62 pcbddc->n_ISForDofs = 0; 63 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 64 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 65 PetscFunctionReturn(0); 66 } 67 68 #undef __FUNCT__ 69 #define __FUNCT__ "PCBDDCResetTopography" 70 PetscErrorCode PCBDDCResetTopography(PC pc) 71 { 72 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 77 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 78 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "PCBDDCResetSolvers" 84 PetscErrorCode PCBDDCResetSolvers(PC pc) 85 { 86 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 87 PetscErrorCode ierr; 88 89 PetscFunctionBegin; 90 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 91 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 97 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 99 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 102 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 107 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 108 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 109 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 110 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 111 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 117 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 118 { 119 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 120 PC_IS *pcis = (PC_IS*)pc->data; 121 VecType impVecType; 122 PetscInt n_constraints,n_R,old_size; 123 PetscErrorCode ierr; 124 125 PetscFunctionBegin; 126 if (!pcbddc->ConstraintMatrix) { 127 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 128 } 129 /* get sizes */ 130 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 131 n_R = pcis->n-pcbddc->n_actual_vertices; 132 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 133 /* local work vectors (try to avoid unneeded work)*/ 134 /* R nodes */ 135 old_size = -1; 136 if (pcbddc->vec1_R) { 137 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 138 } 139 if (n_R != old_size) { 140 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 141 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 142 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 143 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 144 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 145 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 146 } 147 /* local primal dofs */ 148 old_size = -1; 149 if (pcbddc->vec1_P) { 150 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 151 } 152 if (pcbddc->local_primal_size != old_size) { 153 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 154 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 155 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 156 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 157 } 158 /* local explicit constraints */ 159 old_size = -1; 160 if (pcbddc->vec1_C) { 161 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 162 } 163 if (n_constraints && n_constraints != old_size) { 164 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 165 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 166 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 167 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "PCBDDCSetUpCorrection" 174 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 175 { 176 PetscErrorCode ierr; 177 /* pointers to pcis and pcbddc */ 178 PC_IS* pcis = (PC_IS*)pc->data; 179 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 180 /* submatrices of local problem */ 181 Mat A_RV,A_VR,A_VV; 182 /* working matrices */ 183 Mat M1,M2,M3,C_CR; 184 /* working vectors */ 185 Vec vec1_C,vec2_C,vec1_V,vec2_V; 186 /* additional working stuff */ 187 IS is_aux; 188 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 189 const PetscScalar *array,*row_cmat_values; 190 const PetscInt *row_cmat_indices,*idx_R_local; 191 PetscInt *idx_V_B,*auxindices; 192 PetscInt n_vertices,n_constraints,size_of_constraint; 193 PetscInt i,j,n_R,n_D,n_B; 194 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 195 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 196 MatType impMatType; 197 /* some shortcuts to scalars */ 198 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 199 /* for debugging purposes */ 200 PetscReal *coarsefunctions_errors,*constraints_errors; 201 202 PetscFunctionBegin; 203 /* get number of vertices (corners plus constraints with change of basis) 204 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 205 n_vertices = pcbddc->n_actual_vertices; 206 n_constraints = pcbddc->local_primal_size-n_vertices; 207 /* Set Non-overlapping dimensions */ 208 n_B = pcis->n_B; n_D = pcis->n - n_B; 209 n_R = pcis->n-n_vertices; 210 211 /* Set types for local objects needed by BDDC precondtioner */ 212 impMatType = MATSEQDENSE; 213 214 /* Allocating some extra storage just to be safe */ 215 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 216 for (i=0;i<pcis->n;i++) auxindices[i]=i; 217 218 /* vertices in boundary numbering */ 219 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 220 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 221 if (i != n_vertices) { 222 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 223 } 224 225 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 226 if (n_constraints) { 227 /* see if we can save some allocations */ 228 if (pcbddc->local_auxmat2) { 229 PetscInt on_R,on_constraints; 230 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 231 if (on_R != n_R || on_constraints != n_constraints) { 232 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 233 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 234 } 235 } 236 /* work vectors */ 237 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 238 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 239 /* auxiliary matrices */ 240 if (!pcbddc->local_auxmat2) { 241 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 242 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 243 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 244 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 245 } 246 247 /* Extract constraints on R nodes: C_{CR} */ 248 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 249 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 250 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 251 252 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 253 for (i=0;i<n_constraints;i++) { 254 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 255 /* Get row of constraint matrix in R numbering */ 256 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 257 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 258 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 259 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 260 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 261 /* Solve for row of constraint matrix in R numbering */ 262 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 263 /* Set values in local_auxmat2 */ 264 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 265 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 266 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 267 } 268 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 269 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 270 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 271 272 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 273 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 274 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 275 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 276 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 277 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 278 ierr = MatSetUp(M1);CHKERRQ(ierr); 279 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 280 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 281 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 282 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 283 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 284 ierr = MatDestroy(&M2);CHKERRQ(ierr); 285 ierr = MatDestroy(&M3);CHKERRQ(ierr); 286 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 287 if (!pcbddc->local_auxmat1) { 288 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 289 } else { 290 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 291 } 292 } 293 294 /* Get submatrices from subdomain matrix */ 295 if (n_vertices) { 296 PetscInt ibs,mbs; 297 PetscBool issbaij; 298 Mat newmat; 299 300 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 301 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 302 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 303 if (ibs != mbs) { /* need to convert to SEQAIJ */ 304 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 305 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 307 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 308 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 309 } else { 310 /* this is safe */ 311 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 312 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 313 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 314 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 315 /* which of the two approaches is faster? */ 316 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 317 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 318 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 319 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 320 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 321 } else { 322 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 323 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 324 } 325 } 326 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 327 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 328 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 329 } 330 331 /* Matrix of coarse basis functions (local) */ 332 if (pcbddc->coarse_phi_B) { 333 PetscInt on_B,on_primal; 334 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 335 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 336 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 337 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 338 } 339 } 340 if (pcbddc->coarse_phi_D) { 341 PetscInt on_D,on_primal; 342 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 343 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 344 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 345 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 346 } 347 } 348 if (!pcbddc->coarse_phi_B) { 349 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 350 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 351 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 352 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 353 } 354 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 355 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 356 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 357 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 358 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 359 } 360 361 if (pcbddc->dbg_flag) { 362 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 363 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 364 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 365 } 366 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 367 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 368 369 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 370 371 /* vertices */ 372 for (i=0;i<n_vertices;i++) { 373 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 374 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 375 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 376 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 377 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 378 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 379 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 380 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 381 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 382 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 383 if (n_constraints) { 384 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 385 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 386 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 387 } 388 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 389 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 390 391 /* Set values in coarse basis function and subdomain part of coarse_mat */ 392 /* coarse basis functions */ 393 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 394 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 395 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 396 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 397 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 398 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 399 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 400 if (pcbddc->switch_static || pcbddc->dbg_flag) { 401 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 402 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 404 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 405 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 406 } 407 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 408 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 409 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 410 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 411 if (n_constraints) { 412 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 413 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 415 } 416 417 /* check */ 418 if (pcbddc->dbg_flag) { 419 /* assemble subdomain vector on local nodes */ 420 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 421 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 422 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 423 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 424 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 425 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 426 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 427 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 428 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 429 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 430 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 431 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 432 if (n_constraints) { 433 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 434 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 436 } 437 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 438 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 440 /* check saddle point solution */ 441 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 442 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 443 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 444 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 445 /* shift by the identity matrix */ 446 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 447 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 449 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 450 } 451 } 452 453 /* constraints */ 454 for (i=0;i<n_constraints;i++) { 455 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 456 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 457 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 458 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 459 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 460 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 461 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 462 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 463 if (n_vertices) { 464 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 465 } 466 /* Set values in coarse basis function and subdomain part of coarse_mat */ 467 /* coarse basis functions */ 468 j = i+n_vertices; /* don't touch this! */ 469 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 470 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 473 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 474 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 475 if (pcbddc->switch_static || pcbddc->dbg_flag) { 476 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 479 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 480 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 481 } 482 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 483 if (n_vertices) { 484 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 485 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 486 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 487 } 488 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 489 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 490 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 491 492 if (pcbddc->dbg_flag) { 493 /* assemble subdomain vector on nodes */ 494 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 495 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 496 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 497 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 498 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 499 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 500 /* assemble subdomain vector of lagrange multipliers */ 501 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 502 if (n_vertices) { 503 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 504 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 505 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 506 } 507 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 508 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 509 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 510 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 511 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 512 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 513 /* check saddle point solution */ 514 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 515 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 516 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 517 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 518 /* shift by the identity matrix */ 519 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 520 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 521 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 522 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 523 } 524 } 525 /* call assembling routines for local coarse basis */ 526 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 527 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 if (pcbddc->switch_static || pcbddc->dbg_flag) { 529 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 530 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 } 532 533 /* compute other basis functions for non-symmetric problems */ 534 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 535 if (!setsym || (setsym && !issym)) { 536 if (!pcbddc->coarse_psi_B) { 537 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 538 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 539 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 540 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 541 } 542 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 543 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 544 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 545 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 546 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 547 } 548 for (i=0;i<pcbddc->local_primal_size;i++) { 549 if (n_constraints) { 550 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 551 for (j=0;j<n_constraints;j++) { 552 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 553 } 554 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 555 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 556 } 557 if (i<n_vertices) { 558 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 559 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 560 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 561 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 562 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 563 if (n_constraints) { 564 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 565 } 566 } else { 567 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 568 } 569 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 570 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 571 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 572 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 573 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 574 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 575 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 576 if (i<n_vertices) { 577 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 578 } 579 if (pcbddc->switch_static || pcbddc->dbg_flag) { 580 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 581 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 582 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 583 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 584 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 585 } 586 587 if (pcbddc->dbg_flag) { 588 /* assemble subdomain vector on nodes */ 589 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 590 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 591 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 592 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 593 if (i<n_vertices) { 594 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 595 } 596 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 597 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 598 /* assemble subdomain vector of lagrange multipliers */ 599 for (j=0;j<pcbddc->local_primal_size;j++) { 600 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 601 } 602 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 603 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 604 /* check saddle point solution */ 605 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 606 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 607 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 608 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 609 /* shift by the identity matrix */ 610 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 611 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 612 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 613 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 614 } 615 } 616 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 617 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 618 if (pcbddc->switch_static || pcbddc->dbg_flag) { 619 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 620 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 621 } 622 } 623 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 624 /* Checking coarse_sub_mat and coarse basis functios */ 625 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 626 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 627 if (pcbddc->dbg_flag) { 628 Mat coarse_sub_mat; 629 Mat AUXMAT,TM1,TM2,TM3,TM4; 630 Mat coarse_phi_D,coarse_phi_B; 631 Mat coarse_psi_D,coarse_psi_B; 632 Mat A_II,A_BB,A_IB,A_BI; 633 MatType checkmattype=MATSEQAIJ; 634 PetscReal real_value; 635 636 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 637 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 638 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 639 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 640 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 641 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 642 if (pcbddc->coarse_psi_B) { 643 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 644 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 645 } 646 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 647 648 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 649 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 650 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 651 if (pcbddc->coarse_psi_B) { 652 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 653 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 654 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 655 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 656 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 657 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 658 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 659 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 660 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 661 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 662 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 663 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 664 } else { 665 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 666 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 667 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 668 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 669 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 670 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 671 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 672 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 673 } 674 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 675 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 676 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 677 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 678 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 679 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 680 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 681 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 682 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 683 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 684 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 685 for (i=0;i<pcbddc->local_primal_size;i++) { 686 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 687 } 688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 689 for (i=0;i<pcbddc->local_primal_size;i++) { 690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 691 } 692 if (pcbddc->coarse_psi_B) { 693 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 694 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 695 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 696 } 697 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 698 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 699 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 700 } 701 } 702 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 703 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 704 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 705 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 706 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 707 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 708 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 709 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 710 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 711 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 712 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 713 if (pcbddc->coarse_psi_B) { 714 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 715 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 716 } 717 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 718 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 719 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 720 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 721 } 722 /* free memory */ 723 if (n_vertices) { 724 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 725 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 726 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 727 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 728 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 729 } 730 if (n_constraints) { 731 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 732 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 733 ierr = MatDestroy(&M1);CHKERRQ(ierr); 734 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 735 } 736 ierr = PetscFree(auxindices);CHKERRQ(ierr); 737 /* get back data */ 738 *coarse_submat_vals_n = coarse_submat_vals; 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 744 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 745 { 746 PC_IS* pcis = (PC_IS*)(pc->data); 747 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 748 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 749 PetscBool issbaij,isbaij; 750 /* manage repeated solves */ 751 MatReuse reuse; 752 MatStructure matstruct; 753 PetscErrorCode ierr; 754 755 PetscFunctionBegin; 756 if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) { 757 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 758 } 759 /* get mat flags */ 760 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 761 reuse = MAT_INITIAL_MATRIX; 762 if (pc->setupcalled) { 763 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 764 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 765 if (matstruct == SAME_NONZERO_PATTERN) { 766 reuse = MAT_REUSE_MATRIX; 767 } else { 768 reuse = MAT_INITIAL_MATRIX; 769 } 770 } 771 if (reuse == MAT_INITIAL_MATRIX) { 772 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 773 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 774 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 775 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 776 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 777 } 778 779 /* transform local matrices if needed */ 780 if (pcbddc->use_change_of_basis) { 781 Mat change_mat_all; 782 PetscScalar *row_cmat_values; 783 PetscInt *row_cmat_indices; 784 PetscInt *nnz,*is_indices,*temp_indices; 785 PetscInt i,j,k,n_D,n_B; 786 787 /* Get Non-overlapping dimensions */ 788 n_B = pcis->n_B; 789 n_D = pcis->n-n_B; 790 791 /* compute nonzero structure of change of basis on all local nodes */ 792 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 793 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 794 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 795 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 796 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 797 k=1; 798 for (i=0;i<n_B;i++) { 799 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 800 nnz[is_indices[i]]=j; 801 if (k < j) k = j; 802 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 803 } 804 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 805 /* assemble change of basis matrix on the whole set of local dofs */ 806 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 807 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 808 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 809 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 810 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 811 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 812 for (i=0;i<n_D;i++) { 813 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 814 } 815 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 816 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 817 for (i=0;i<n_B;i++) { 818 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 819 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 820 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 821 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 822 } 823 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 824 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 825 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 826 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 827 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 828 if (!issbaij && !isbaij) { 829 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 830 } else { 831 Mat work_mat; 832 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 833 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 834 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 835 } 836 /* 837 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 838 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 839 */ 840 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 841 ierr = PetscFree(nnz);CHKERRQ(ierr); 842 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 843 } else { 844 /* without change of basis, the local matrix is unchanged */ 845 if (!pcbddc->local_mat) { 846 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 847 pcbddc->local_mat = matis->A; 848 } 849 } 850 851 /* get submatrices */ 852 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 853 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 854 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 855 if (!issbaij) { 856 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 857 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 858 } else { 859 Mat newmat; 860 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 861 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 862 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 863 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 864 } 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 870 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 871 { 872 PC_IS* pcis = (PC_IS*)(pc->data); 873 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 874 IS is_aux1,is_aux2; 875 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 876 PetscInt n_vertices,i,j,n_R,n_D,n_B; 877 PetscInt vbs,bs; 878 PetscBT bitmask; 879 PetscErrorCode ierr; 880 881 PetscFunctionBegin; 882 /* No need to setup local scatters if primal space is unchanged */ 883 if (!pcbddc->new_primal_space_local) { 884 PetscFunctionReturn(0); 885 } 886 /* destroy old objects */ 887 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 888 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 889 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 890 /* Set Non-overlapping dimensions */ 891 n_B = pcis->n_B; n_D = pcis->n - n_B; 892 n_vertices = pcbddc->n_actual_vertices; 893 /* create auxiliary bitmask */ 894 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 895 for (i=0;i<n_vertices;i++) { 896 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 897 } 898 899 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 900 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 901 for (i=0, n_R=0; i<pcis->n; i++) { 902 if (!PetscBTLookup(bitmask,i)) { 903 idx_R_local[n_R] = i; 904 n_R++; 905 } 906 } 907 908 /* Block code */ 909 vbs = 1; 910 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 911 if (bs>1 && !(n_vertices%bs)) { 912 PetscBool is_blocked = PETSC_TRUE; 913 PetscInt *vary; 914 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 915 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 916 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 917 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 918 for (i=0; i<n_vertices; i++) { 919 if (vary[i]!=0 && vary[i]!=bs) { 920 is_blocked = PETSC_FALSE; 921 break; 922 } 923 } 924 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 925 vbs = bs; 926 for (i=0;i<n_R/vbs;i++) { 927 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 928 } 929 } 930 ierr = PetscFree(vary);CHKERRQ(ierr); 931 } 932 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 933 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 934 935 /* print some info if requested */ 936 if (pcbddc->dbg_flag) { 937 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 938 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 939 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 940 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 941 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 942 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 943 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 944 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 945 } 946 947 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 948 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 949 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 950 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 951 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 952 for (i=0; i<n_D; i++) { 953 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 954 } 955 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 956 for (i=0, j=0; i<n_R; i++) { 957 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 958 aux_array1[j++] = i; 959 } 960 } 961 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 962 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 963 for (i=0, j=0; i<n_B; i++) { 964 if (!PetscBTLookup(bitmask,is_indices[i])) { 965 aux_array2[j++] = i; 966 } 967 } 968 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 969 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 970 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 971 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 972 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 973 974 if (pcbddc->switch_static || pcbddc->dbg_flag) { 975 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 976 for (i=0, j=0; i<n_R; i++) { 977 if (PetscBTLookup(bitmask,idx_R_local[i])) { 978 aux_array1[j++] = i; 979 } 980 } 981 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 982 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 983 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 984 } 985 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 986 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 993 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 994 { 995 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 996 PC_IS *pcis = (PC_IS*)pc->data; 997 PC pc_temp; 998 Mat A_RR; 999 MatStructure matstruct; 1000 MatReuse reuse; 1001 PetscScalar m_one = -1.0; 1002 PetscReal value; 1003 PetscInt n_D,n_R,ibs,mbs; 1004 PetscBool use_exact,use_exact_reduced,issbaij; 1005 PetscErrorCode ierr; 1006 /* prefixes stuff */ 1007 char dir_prefix[256],neu_prefix[256],str_level[3]; 1008 size_t len; 1009 1010 PetscFunctionBegin; 1011 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 1012 1013 /* compute prefixes */ 1014 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1015 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1016 if (!pcbddc->current_level) { 1017 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1018 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1019 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1020 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1021 } else { 1022 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1023 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 1024 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1025 len -= 15; /* remove "pc_bddc_coarse_" */ 1026 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 1027 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1028 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1029 *(dir_prefix+len)='\0'; 1030 *(neu_prefix+len)='\0'; 1031 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1032 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1033 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1034 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1035 } 1036 1037 /* DIRICHLET PROBLEM */ 1038 /* Matrix for Dirichlet problem is pcis->A_II */ 1039 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1040 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1041 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1042 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1043 /* default */ 1044 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1045 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1046 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1047 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1048 if (issbaij) { 1049 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1050 } else { 1051 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1052 } 1053 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1054 } 1055 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 1056 /* Allow user's customization */ 1057 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1058 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1059 if (!n_D) { 1060 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1061 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1062 } 1063 /* Set Up KSP for Dirichlet problem of BDDC */ 1064 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1065 /* set ksp_D into pcis data */ 1066 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1067 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1068 pcis->ksp_D = pcbddc->ksp_D; 1069 1070 /* NEUMANN PROBLEM */ 1071 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1072 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1073 if (pcbddc->ksp_R) { /* already created ksp */ 1074 PetscInt nn_R; 1075 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr); 1076 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1077 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1078 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1079 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1080 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1081 reuse = MAT_INITIAL_MATRIX; 1082 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1083 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1084 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1085 reuse = MAT_INITIAL_MATRIX; 1086 } else { /* safe to reuse the matrix */ 1087 reuse = MAT_REUSE_MATRIX; 1088 } 1089 } 1090 /* last check */ 1091 if (matstruct == DIFFERENT_NONZERO_PATTERN) { 1092 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1093 reuse = MAT_INITIAL_MATRIX; 1094 } 1095 } else { /* first time, so we need to create the matrix */ 1096 reuse = MAT_INITIAL_MATRIX; 1097 } 1098 /* extract A_RR */ 1099 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1100 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1101 if (ibs != mbs) { 1102 Mat newmat; 1103 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1104 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1105 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1106 } else { 1107 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1108 } 1109 if (!pcbddc->ksp_R) { /* create object if not present */ 1110 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1111 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1112 /* default */ 1113 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1114 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1115 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1116 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1117 if (issbaij) { 1118 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1119 } else { 1120 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1121 } 1122 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1123 } 1124 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1125 /* Allow user's customization */ 1126 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1127 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1128 if (!n_R) { 1129 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1130 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1131 } 1132 /* Set Up KSP for Neumann problem of BDDC */ 1133 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1134 1135 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1136 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1137 /* Dirichlet */ 1138 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1139 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1140 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1141 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1142 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1143 /* need to be adapted? */ 1144 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1145 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1146 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1147 /* print info */ 1148 if (pcbddc->dbg_flag) { 1149 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1150 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1151 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1152 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1153 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1154 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1155 } 1156 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1157 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1158 } 1159 1160 /* Neumann */ 1161 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1162 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1163 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1164 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1165 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1166 /* need to be adapted? */ 1167 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1168 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1169 /* print info */ 1170 if (pcbddc->dbg_flag) { 1171 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1172 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1173 } 1174 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1175 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1176 } 1177 } 1178 /* free Neumann problem's matrix */ 1179 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1185 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1186 { 1187 PetscErrorCode ierr; 1188 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1189 1190 PetscFunctionBegin; 1191 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1192 if (pcbddc->local_auxmat1) { 1193 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1194 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1195 } 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1201 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1202 { 1203 PetscErrorCode ierr; 1204 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1205 PC_IS* pcis = (PC_IS*) (pc->data); 1206 const PetscScalar zero = 0.0; 1207 1208 PetscFunctionBegin; 1209 /* Application of PHI^T (or PSI^T) */ 1210 if (pcbddc->coarse_psi_B) { 1211 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1212 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1213 } else { 1214 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1215 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1216 } 1217 /* Scatter data of coarse_rhs */ 1218 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1219 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1220 1221 /* Local solution on R nodes */ 1222 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1223 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 if (pcbddc->switch_static) { 1226 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1227 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1228 } 1229 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1230 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1231 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1232 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1233 if (pcbddc->switch_static) { 1234 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1235 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1236 } 1237 1238 /* Coarse solution */ 1239 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1240 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1241 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1242 } 1243 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1244 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1245 1246 /* Sum contributions from two levels */ 1247 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1248 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1254 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1255 { 1256 PetscErrorCode ierr; 1257 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1258 1259 PetscFunctionBegin; 1260 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #undef __FUNCT__ 1265 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1266 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1267 { 1268 PetscErrorCode ierr; 1269 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1270 1271 PetscFunctionBegin; 1272 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 /* uncomment for testing purposes */ 1277 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1280 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1281 { 1282 PetscErrorCode ierr; 1283 PC_IS* pcis = (PC_IS*)(pc->data); 1284 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1285 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1286 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1287 MatType impMatType=MATSEQAIJ; 1288 /* one and zero */ 1289 PetscScalar one=1.0,zero=0.0; 1290 /* space to store constraints and their local indices */ 1291 PetscScalar *temp_quadrature_constraint; 1292 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1293 /* iterators */ 1294 PetscInt i,j,k,total_counts,temp_start_ptr; 1295 /* stuff to store connected components stored in pcbddc->mat_graph */ 1296 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1297 PetscInt n_ISForFaces,n_ISForEdges; 1298 /* near null space stuff */ 1299 MatNullSpace nearnullsp; 1300 const Vec *nearnullvecs; 1301 Vec *localnearnullsp; 1302 PetscBool nnsp_has_cnst; 1303 PetscInt nnsp_size; 1304 PetscScalar *array; 1305 /* BLAS integers */ 1306 PetscBLASInt lwork,lierr; 1307 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1308 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1309 /* LAPACK working arrays for SVD or POD */ 1310 PetscBool skip_lapack; 1311 PetscScalar *work; 1312 PetscReal *singular_vals; 1313 #if defined(PETSC_USE_COMPLEX) 1314 PetscReal *rwork; 1315 #endif 1316 #if defined(PETSC_MISSING_LAPACK_GESVD) 1317 PetscBLASInt Blas_one_2=1; 1318 PetscScalar *temp_basis,*correlation_mat; 1319 #else 1320 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1321 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1322 #endif 1323 /* reuse */ 1324 PetscInt olocal_primal_size; 1325 PetscInt *oprimal_indices_local_idxs; 1326 /* change of basis */ 1327 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1328 PetscBool boolforchange,qr_needed; 1329 PetscBT touched,change_basis,qr_needed_idx; 1330 /* auxiliary stuff */ 1331 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1332 /* some quantities */ 1333 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1334 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1335 1336 1337 PetscFunctionBegin; 1338 /* Destroy Mat objects computed previously */ 1339 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1340 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1341 /* Get index sets for faces, edges and vertices from graph */ 1342 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1343 pcbddc->use_vertices = PETSC_TRUE; 1344 } 1345 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1346 /* HACK: provide functions to set change of basis */ 1347 if (!ISForVertices && pcbddc->NullSpace) { 1348 pcbddc->use_change_of_basis = PETSC_TRUE; 1349 pcbddc->use_change_on_faces = PETSC_FALSE; 1350 } 1351 /* print some info */ 1352 if (pcbddc->dbg_flag) { 1353 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1354 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1355 i = 0; 1356 if (ISForVertices) { 1357 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1358 } 1359 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1360 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1361 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1362 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1363 } 1364 /* check if near null space is attached to global mat */ 1365 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1366 if (nearnullsp) { 1367 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1368 /* remove any stored info */ 1369 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1370 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1371 /* store information for BDDC solver reuse */ 1372 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1373 pcbddc->onearnullspace = nearnullsp; 1374 ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1375 for (i=0;i<nnsp_size;i++) { 1376 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1377 } 1378 } else { /* if near null space is not provided BDDC uses constants by default */ 1379 nnsp_size = 0; 1380 nnsp_has_cnst = PETSC_TRUE; 1381 } 1382 /* get max number of constraints on a single cc */ 1383 max_constraints = nnsp_size; 1384 if (nnsp_has_cnst) max_constraints++; 1385 1386 /* 1387 Evaluate maximum storage size needed by the procedure 1388 - temp_indices will contain start index of each constraint stored as follows 1389 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1390 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1391 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1392 */ 1393 total_counts = n_ISForFaces+n_ISForEdges; 1394 total_counts *= max_constraints; 1395 n_vertices = 0; 1396 if (ISForVertices) { 1397 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1398 } 1399 total_counts += n_vertices; 1400 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1401 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1402 total_counts = 0; 1403 max_size_of_constraint = 0; 1404 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1405 if (i<n_ISForEdges) { 1406 used_IS = &ISForEdges[i]; 1407 } else { 1408 used_IS = &ISForFaces[i-n_ISForEdges]; 1409 } 1410 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1411 total_counts += j; 1412 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1413 } 1414 total_counts *= max_constraints; 1415 total_counts += n_vertices; 1416 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1417 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1418 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1419 /* get local part of global near null space vectors */ 1420 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1421 for (k=0;k<nnsp_size;k++) { 1422 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1423 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1424 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1425 } 1426 1427 /* whether or not to skip lapack calls */ 1428 skip_lapack = PETSC_TRUE; 1429 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1430 1431 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1432 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1433 PetscScalar temp_work; 1434 #if defined(PETSC_MISSING_LAPACK_GESVD) 1435 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1436 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1437 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1438 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1439 #if defined(PETSC_USE_COMPLEX) 1440 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1441 #endif 1442 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1443 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1444 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1445 lwork = -1; 1446 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1447 #if !defined(PETSC_USE_COMPLEX) 1448 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1449 #else 1450 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1451 #endif 1452 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1453 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1454 #else /* on missing GESVD */ 1455 /* SVD */ 1456 PetscInt max_n,min_n; 1457 max_n = max_size_of_constraint; 1458 min_n = max_constraints; 1459 if (max_size_of_constraint < max_constraints) { 1460 min_n = max_size_of_constraint; 1461 max_n = max_constraints; 1462 } 1463 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1464 #if defined(PETSC_USE_COMPLEX) 1465 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1466 #endif 1467 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1468 lwork = -1; 1469 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1470 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1471 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1472 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1473 #if !defined(PETSC_USE_COMPLEX) 1474 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1475 #else 1476 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1477 #endif 1478 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1479 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1480 #endif /* on missing GESVD */ 1481 /* Allocate optimal workspace */ 1482 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1483 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1484 } 1485 /* Now we can loop on constraining sets */ 1486 total_counts = 0; 1487 temp_indices[0] = 0; 1488 /* vertices */ 1489 if (ISForVertices) { 1490 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1491 if (nnsp_has_cnst) { /* consider all vertices */ 1492 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1493 for (i=0;i<n_vertices;i++) { 1494 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1495 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1496 total_counts++; 1497 } 1498 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1499 PetscBool used_vertex; 1500 for (i=0;i<n_vertices;i++) { 1501 used_vertex = PETSC_FALSE; 1502 k = 0; 1503 while (!used_vertex && k<nnsp_size) { 1504 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1505 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1506 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1507 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1508 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1509 total_counts++; 1510 used_vertex = PETSC_TRUE; 1511 } 1512 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1513 k++; 1514 } 1515 } 1516 } 1517 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1518 n_vertices = total_counts; 1519 } 1520 1521 /* edges and faces */ 1522 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1523 if (i<n_ISForEdges) { 1524 used_IS = &ISForEdges[i]; 1525 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1526 } else { 1527 used_IS = &ISForFaces[i-n_ISForEdges]; 1528 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1529 } 1530 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1531 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1532 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1533 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1534 /* change of basis should not be performed on local periodic nodes */ 1535 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1536 if (nnsp_has_cnst) { 1537 PetscScalar quad_value; 1538 temp_constraints++; 1539 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1540 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1541 for (j=0;j<size_of_constraint;j++) { 1542 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1543 } 1544 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1545 total_counts++; 1546 } 1547 for (k=0;k<nnsp_size;k++) { 1548 PetscReal real_value; 1549 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1550 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1551 for (j=0;j<size_of_constraint;j++) { 1552 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1553 } 1554 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1555 /* check if array is null on the connected component */ 1556 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1557 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1558 if (real_value > 0.0) { /* keep indices and values */ 1559 temp_constraints++; 1560 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1561 total_counts++; 1562 } 1563 } 1564 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1565 valid_constraints = temp_constraints; 1566 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1567 if (!pcbddc->use_nnsp_true && temp_constraints) { 1568 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1569 1570 #if defined(PETSC_MISSING_LAPACK_GESVD) 1571 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1572 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1573 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1574 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1575 from that computed using LAPACKgesvd 1576 -> This is due to a different computation of eigenvectors in LAPACKheev 1577 -> The quality of the POD-computed basis will be the same */ 1578 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1579 /* Store upper triangular part of correlation matrix */ 1580 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1581 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1582 for (j=0;j<temp_constraints;j++) { 1583 for (k=0;k<j+1;k++) { 1584 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1585 } 1586 } 1587 /* compute eigenvalues and eigenvectors of correlation matrix */ 1588 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1589 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1590 #if !defined(PETSC_USE_COMPLEX) 1591 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1592 #else 1593 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1594 #endif 1595 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1596 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1597 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1598 j=0; 1599 while (j < temp_constraints && singular_vals[j] < tol) j++; 1600 total_counts=total_counts-j; 1601 valid_constraints = temp_constraints-j; 1602 /* scale and copy POD basis into used quadrature memory */ 1603 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1604 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1605 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1606 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1607 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1608 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1609 if (j<temp_constraints) { 1610 PetscInt ii; 1611 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1612 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1613 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1614 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1615 for (k=0;k<temp_constraints-j;k++) { 1616 for (ii=0;ii<size_of_constraint;ii++) { 1617 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1618 } 1619 } 1620 } 1621 #else /* on missing GESVD */ 1622 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1623 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1624 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1625 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1626 #if !defined(PETSC_USE_COMPLEX) 1627 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1628 #else 1629 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1630 #endif 1631 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1632 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1633 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1634 k = temp_constraints; 1635 if (k > size_of_constraint) k = size_of_constraint; 1636 j = 0; 1637 while (j < k && singular_vals[k-j-1] < tol) j++; 1638 total_counts = total_counts-temp_constraints+k-j; 1639 valid_constraints = k-j; 1640 #endif /* on missing GESVD */ 1641 } 1642 /* setting change_of_basis flag is safe now */ 1643 if (boolforchange) { 1644 for (j=0;j<valid_constraints;j++) { 1645 PetscBTSet(change_basis,total_counts-j-1); 1646 } 1647 } 1648 } 1649 /* free index sets of faces, edges and vertices */ 1650 for (i=0;i<n_ISForFaces;i++) { 1651 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1652 } 1653 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1654 for (i=0;i<n_ISForEdges;i++) { 1655 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1656 } 1657 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1658 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1659 /* map temp_indices_to_constraint in boundary numbering */ 1660 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1661 if (i != temp_indices[total_counts]) { 1662 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1663 } 1664 1665 /* free workspace */ 1666 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1667 ierr = PetscFree(work);CHKERRQ(ierr); 1668 #if defined(PETSC_USE_COMPLEX) 1669 ierr = PetscFree(rwork);CHKERRQ(ierr); 1670 #endif 1671 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1672 #if defined(PETSC_MISSING_LAPACK_GESVD) 1673 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1674 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1675 #endif 1676 } 1677 for (k=0;k<nnsp_size;k++) { 1678 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1679 } 1680 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1681 1682 /* set quantities in pcbddc data structure and store previous primal size */ 1683 /* n_vertices defines the number of subdomain corners in the primal space */ 1684 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1685 olocal_primal_size = pcbddc->local_primal_size; 1686 pcbddc->local_primal_size = total_counts; 1687 pcbddc->n_vertices = n_vertices; 1688 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1689 1690 /* Create constraint matrix */ 1691 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1692 /* so we need to set it up properly either with or without change of basis */ 1693 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1694 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1695 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1696 /* array to compute a local numbering of constraints : vertices first then constraints */ 1697 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1698 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1699 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1700 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1701 /* auxiliary stuff for basis change */ 1702 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1703 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1704 1705 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1706 total_primal_vertices=0; 1707 for (i=0;i<pcbddc->local_primal_size;i++) { 1708 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1709 if (size_of_constraint == 1) { 1710 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1711 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1712 aux_primal_minloc[total_primal_vertices]=0; 1713 total_primal_vertices++; 1714 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1715 PetscInt min_loc,min_index; 1716 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1717 /* find first untouched local node */ 1718 k = 0; 1719 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1720 min_index = global_indices[k]; 1721 min_loc = k; 1722 /* search the minimum among global nodes already untouched on the cc */ 1723 for (k=1;k<size_of_constraint;k++) { 1724 /* there can be more than one constraint on a single connected component */ 1725 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1726 min_index = global_indices[k]; 1727 min_loc = k; 1728 } 1729 } 1730 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1731 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1732 aux_primal_minloc[total_primal_vertices]=min_loc; 1733 total_primal_vertices++; 1734 } 1735 } 1736 /* determine if a QR strategy is needed for change of basis */ 1737 qr_needed = PETSC_FALSE; 1738 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1739 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1740 if (PetscBTLookup(change_basis,i)) { 1741 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1742 j = 0; 1743 for (k=0;k<size_of_constraint;k++) { 1744 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1745 j++; 1746 } 1747 } 1748 /* found more than one primal dof on the cc */ 1749 if (j > 1) { 1750 PetscBTSet(qr_needed_idx,i); 1751 qr_needed = PETSC_TRUE; 1752 } 1753 } 1754 } 1755 /* free workspace */ 1756 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1757 1758 /* permute indices in order to have a sorted set of vertices */ 1759 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1760 1761 /* nonzero structure of constraint matrix */ 1762 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1763 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1764 j=total_primal_vertices; 1765 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1766 if (!PetscBTLookup(change_basis,i)) { 1767 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1768 j++; 1769 } 1770 } 1771 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1772 ierr = PetscFree(nnz);CHKERRQ(ierr); 1773 /* set values in constraint matrix */ 1774 for (i=0;i<total_primal_vertices;i++) { 1775 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1776 } 1777 total_counts = total_primal_vertices; 1778 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1779 if (!PetscBTLookup(change_basis,i)) { 1780 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1781 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1782 total_counts++; 1783 } 1784 } 1785 /* assembling */ 1786 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1787 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1788 /* 1789 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1790 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1791 */ 1792 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1793 if (pcbddc->use_change_of_basis) { 1794 /* dual and primal dofs on a single cc */ 1795 PetscInt dual_dofs,primal_dofs; 1796 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1797 PetscInt primal_counter; 1798 /* working stuff for GEQRF */ 1799 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1800 PetscBLASInt lqr_work; 1801 /* working stuff for UNGQR */ 1802 PetscScalar *gqr_work,lgqr_work_t; 1803 PetscBLASInt lgqr_work; 1804 /* working stuff for TRTRS */ 1805 PetscScalar *trs_rhs; 1806 PetscBLASInt Blas_NRHS; 1807 /* pointers for values insertion into change of basis matrix */ 1808 PetscInt *start_rows,*start_cols; 1809 PetscScalar *start_vals; 1810 /* working stuff for values insertion */ 1811 PetscBT is_primal; 1812 1813 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1814 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1815 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1816 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1817 /* work arrays */ 1818 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1819 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1820 /* nonzeros per row */ 1821 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1822 if (PetscBTLookup(change_basis,i)) { 1823 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1824 if (PetscBTLookup(qr_needed_idx,i)) { 1825 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1826 } else { 1827 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1828 /* get local primal index on the cc */ 1829 j = 0; 1830 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1831 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1832 } 1833 } 1834 } 1835 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1836 ierr = PetscFree(nnz);CHKERRQ(ierr); 1837 /* Set initial identity in the matrix */ 1838 for (i=0;i<pcis->n_B;i++) { 1839 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1840 } 1841 1842 if (pcbddc->dbg_flag) { 1843 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1844 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1845 } 1846 1847 1848 /* Now we loop on the constraints which need a change of basis */ 1849 /* 1850 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1851 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1852 1853 Basic blocks of change of basis matrix T computed 1854 1855 - Using the following block transformation if there is only a primal dof on the cc 1856 (in the example, primal dof is the last one of the edge in LOCAL ordering 1857 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1858 | 1 0 ... 0 1 | 1859 | 0 1 ... 0 1 | 1860 | ... | 1861 | 0 ... 1 1 | 1862 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1863 1864 - via QR decomposition of constraints otherwise 1865 */ 1866 if (qr_needed) { 1867 /* space to store Q */ 1868 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1869 /* first we issue queries for optimal work */ 1870 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1871 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1872 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1873 lqr_work = -1; 1874 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1875 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1876 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1877 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1878 lgqr_work = -1; 1879 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1880 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1881 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1882 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1883 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1884 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1885 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1886 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1887 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1888 /* array to store scaling factors for reflectors */ 1889 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1890 /* array to store rhs and solution of triangular solver */ 1891 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1892 /* allocating workspace for check */ 1893 if (pcbddc->dbg_flag) { 1894 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1895 } 1896 } 1897 /* array to store whether a node is primal or not */ 1898 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1899 ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr); 1900 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 1901 if (i != total_primal_vertices) { 1902 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 1903 } 1904 for (i=0;i<total_primal_vertices;i++) { 1905 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 1906 } 1907 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 1908 1909 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1910 /* -> using implicit ordering contained in temp_indices data */ 1911 total_counts = pcbddc->n_vertices; 1912 primal_counter = total_counts; 1913 while (total_counts<pcbddc->local_primal_size) { 1914 primal_dofs = 1; 1915 if (PetscBTLookup(change_basis,total_counts)) { 1916 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1917 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1918 primal_dofs++; 1919 } 1920 /* get constraint info */ 1921 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1922 dual_dofs = size_of_constraint-primal_dofs; 1923 1924 if (pcbddc->dbg_flag) { 1925 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1926 } 1927 1928 if (primal_dofs > 1) { /* QR */ 1929 1930 /* copy quadrature constraints for change of basis check */ 1931 if (pcbddc->dbg_flag) { 1932 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1933 } 1934 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1935 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1936 1937 /* compute QR decomposition of constraints */ 1938 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1939 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1940 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1941 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1942 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1943 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1944 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1945 1946 /* explictly compute R^-T */ 1947 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1948 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1949 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1950 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1951 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1952 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1953 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1954 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1955 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1956 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1957 1958 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1959 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1960 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1961 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1962 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1963 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1964 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1965 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1966 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1967 1968 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1969 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1970 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1971 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1972 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1973 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1974 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1975 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1976 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1977 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1978 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1979 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1980 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1981 1982 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1983 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1984 /* insert cols for primal dofs */ 1985 for (j=0;j<primal_dofs;j++) { 1986 start_vals = &qr_basis[j*size_of_constraint]; 1987 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1988 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1989 } 1990 /* insert cols for dual dofs */ 1991 for (j=0,k=0;j<dual_dofs;k++) { 1992 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1993 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1994 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1995 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1996 j++; 1997 } 1998 } 1999 2000 /* check change of basis */ 2001 if (pcbddc->dbg_flag) { 2002 PetscInt ii,jj; 2003 PetscBool valid_qr=PETSC_TRUE; 2004 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2005 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2006 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2007 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2008 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2009 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2010 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2011 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2012 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2013 for (jj=0;jj<size_of_constraint;jj++) { 2014 for (ii=0;ii<primal_dofs;ii++) { 2015 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2016 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2017 } 2018 } 2019 if (!valid_qr) { 2020 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2021 for (jj=0;jj<size_of_constraint;jj++) { 2022 for (ii=0;ii<primal_dofs;ii++) { 2023 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2024 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2025 } 2026 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2027 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2028 } 2029 } 2030 } 2031 } else { 2032 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2033 } 2034 } 2035 } else { /* simple transformation block */ 2036 PetscInt row,col; 2037 PetscScalar val; 2038 for (j=0;j<size_of_constraint;j++) { 2039 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2040 if (!PetscBTLookup(is_primal,row)) { 2041 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2042 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2043 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2044 } else { 2045 for (k=0;k<size_of_constraint;k++) { 2046 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2047 if (row != col) { 2048 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2049 } else { 2050 val = 1.0; 2051 } 2052 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2053 } 2054 } 2055 } 2056 } 2057 /* increment primal counter */ 2058 primal_counter += primal_dofs; 2059 } else { 2060 if (pcbddc->dbg_flag) { 2061 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2062 } 2063 } 2064 /* increment constraint counter total_counts */ 2065 total_counts += primal_dofs; 2066 } 2067 2068 /* free workspace */ 2069 if (qr_needed) { 2070 if (pcbddc->dbg_flag) { 2071 ierr = PetscFree(work);CHKERRQ(ierr); 2072 } 2073 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2074 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2075 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2076 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2077 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2078 } 2079 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2080 /* assembling */ 2081 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2082 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2083 /* 2084 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2085 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2086 */ 2087 } 2088 2089 /* get indices in local ordering for vertices and constraints */ 2090 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2091 ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr); 2092 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2093 } 2094 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2095 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2096 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2097 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2098 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2099 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2100 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2101 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2102 /* set quantities in PCBDDC data struct */ 2103 pcbddc->n_actual_vertices = i; 2104 /* check if a new primal space has been introduced */ 2105 pcbddc->new_primal_space_local = PETSC_TRUE; 2106 if (olocal_primal_size == pcbddc->local_primal_size) { 2107 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2108 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2109 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2110 } 2111 /* need to set pcbddc->new_primal_space_local to true when the local primal space is empty, otherwise pcbddc->is_R_local will not be created */ 2112 if (!pcbddc->local_primal_size) pcbddc->new_primal_space_local = PETSC_TRUE; 2113 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2114 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2115 2116 /* flush dbg viewer */ 2117 if (pcbddc->dbg_flag) { 2118 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2119 } 2120 2121 /* free workspace */ 2122 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2123 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2124 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2125 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2126 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2127 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2128 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2129 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2130 PetscFunctionReturn(0); 2131 } 2132 2133 #undef __FUNCT__ 2134 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2135 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2136 { 2137 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2138 PC_IS *pcis = (PC_IS*)pc->data; 2139 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2140 PetscInt bs,ierr,i,vertex_size; 2141 PetscViewer viewer=pcbddc->dbg_viewer; 2142 2143 PetscFunctionBegin; 2144 /* Reset previously computed graph */ 2145 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2146 /* Init local Graph struct */ 2147 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2148 2149 /* Check validity of the csr graph passed in by the user */ 2150 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2151 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2152 } 2153 2154 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2155 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2156 Mat mat_adj; 2157 const PetscInt *xadj,*adjncy; 2158 PetscBool flg_row=PETSC_TRUE; 2159 2160 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2161 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2162 if (!flg_row) { 2163 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2164 } 2165 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2166 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2167 if (!flg_row) { 2168 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2169 } 2170 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2171 } 2172 2173 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2174 vertex_size = 1; 2175 if (!pcbddc->user_provided_isfordofs) { 2176 if (!pcbddc->n_ISForDofs) { 2177 IS *custom_ISForDofs; 2178 2179 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2180 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2181 for (i=0;i<bs;i++) { 2182 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2183 } 2184 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2185 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2186 /* remove my references to IS objects */ 2187 for (i=0;i<bs;i++) { 2188 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2189 } 2190 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2191 } 2192 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2193 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2194 } 2195 2196 /* Setup of Graph */ 2197 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2198 2199 /* Graph's connected components analysis */ 2200 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2201 2202 /* print some info to stdout */ 2203 if (pcbddc->dbg_flag) { 2204 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2205 } 2206 2207 /* mark topography has done */ 2208 pcbddc->recompute_topography = PETSC_FALSE; 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2214 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2215 { 2216 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2217 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2218 PetscErrorCode ierr; 2219 2220 PetscFunctionBegin; 2221 n = 0; 2222 vertices = 0; 2223 if (pcbddc->ConstraintMatrix) { 2224 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2225 for (i=0;i<local_primal_size;i++) { 2226 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2227 if (size_of_constraint == 1) n++; 2228 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2229 } 2230 if (vertices_idx) { 2231 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2232 n = 0; 2233 for (i=0;i<local_primal_size;i++) { 2234 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2235 if (size_of_constraint == 1) { 2236 vertices[n++]=row_cmat_indices[0]; 2237 } 2238 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2239 } 2240 } 2241 } 2242 *n_vertices = n; 2243 if (vertices_idx) *vertices_idx = vertices; 2244 PetscFunctionReturn(0); 2245 } 2246 2247 #undef __FUNCT__ 2248 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2249 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2250 { 2251 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2252 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2253 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2254 PetscBT touched; 2255 PetscErrorCode ierr; 2256 2257 /* This function assumes that the number of local constraints per connected component 2258 is not greater than the number of nodes defined for the connected component 2259 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2260 PetscFunctionBegin; 2261 n = 0; 2262 constraints_index = 0; 2263 if (pcbddc->ConstraintMatrix) { 2264 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2265 max_size_of_constraint = 0; 2266 for (i=0;i<local_primal_size;i++) { 2267 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2268 if (size_of_constraint > 1) { 2269 n++; 2270 } 2271 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2272 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2273 } 2274 if (constraints_idx) { 2275 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2276 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2277 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2278 n = 0; 2279 for (i=0;i<local_primal_size;i++) { 2280 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2281 if (size_of_constraint > 1) { 2282 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2283 /* find first untouched local node */ 2284 j = 0; 2285 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2286 min_index = row_cmat_global_indices[j]; 2287 min_loc = j; 2288 /* search the minimum among nodes not yet touched on the connected component 2289 since there can be more than one constraint on a single cc */ 2290 for (j=1;j<size_of_constraint;j++) { 2291 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2292 min_index = row_cmat_global_indices[j]; 2293 min_loc = j; 2294 } 2295 } 2296 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2297 constraints_index[n++] = row_cmat_indices[min_loc]; 2298 } 2299 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2300 } 2301 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2302 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2303 } 2304 } 2305 *n_constraints = n; 2306 if (constraints_idx) *constraints_idx = constraints_index; 2307 PetscFunctionReturn(0); 2308 } 2309 2310 /* the next two functions has been adapted from pcis.c */ 2311 #undef __FUNCT__ 2312 #define __FUNCT__ "PCBDDCApplySchur" 2313 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2314 { 2315 PetscErrorCode ierr; 2316 PC_IS *pcis = (PC_IS*)(pc->data); 2317 2318 PetscFunctionBegin; 2319 if (!vec2_B) { vec2_B = v; } 2320 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2321 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2322 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2323 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2324 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2325 PetscFunctionReturn(0); 2326 } 2327 2328 #undef __FUNCT__ 2329 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2330 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2331 { 2332 PetscErrorCode ierr; 2333 PC_IS *pcis = (PC_IS*)(pc->data); 2334 2335 PetscFunctionBegin; 2336 if (!vec2_B) { vec2_B = v; } 2337 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2338 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2339 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2340 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2341 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2342 PetscFunctionReturn(0); 2343 } 2344 2345 #undef __FUNCT__ 2346 #define __FUNCT__ "PCBDDCSubsetNumbering" 2347 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2348 { 2349 Vec local_vec,global_vec; 2350 IS seqis,paris; 2351 VecScatter scatter_ctx; 2352 PetscScalar *array; 2353 PetscInt *temp_global_dofs; 2354 PetscScalar globalsum; 2355 PetscInt i,j,s; 2356 PetscInt nlocals,first_index,old_index,max_local; 2357 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2358 PetscMPIInt *dof_sizes,*dof_displs; 2359 PetscBool first_found; 2360 PetscErrorCode ierr; 2361 2362 PetscFunctionBegin; 2363 /* mpi buffers */ 2364 MPI_Comm_size(comm,&size_prec_comm); 2365 MPI_Comm_rank(comm,&rank_prec_comm); 2366 j = ( !rank_prec_comm ? size_prec_comm : 0); 2367 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2368 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2369 /* get maximum size of subset */ 2370 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2371 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2372 max_local = 0; 2373 if (n_local_dofs) { 2374 max_local = temp_global_dofs[0]; 2375 for (i=1;i<n_local_dofs;i++) { 2376 if (max_local < temp_global_dofs[i] ) { 2377 max_local = temp_global_dofs[i]; 2378 } 2379 } 2380 } 2381 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2382 max_global++; 2383 max_local = 0; 2384 if (n_local_dofs) { 2385 max_local = local_dofs[0]; 2386 for (i=1;i<n_local_dofs;i++) { 2387 if (max_local < local_dofs[i] ) { 2388 max_local = local_dofs[i]; 2389 } 2390 } 2391 } 2392 max_local++; 2393 /* allocate workspace */ 2394 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2395 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2396 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2397 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2398 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2399 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2400 /* create scatter */ 2401 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2402 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2403 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2404 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2405 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2406 /* init array */ 2407 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2408 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2409 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2410 if (local_dofs_mult) { 2411 for (i=0;i<n_local_dofs;i++) { 2412 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2413 } 2414 } else { 2415 for (i=0;i<n_local_dofs;i++) { 2416 array[local_dofs[i]]=1.0; 2417 } 2418 } 2419 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2420 /* scatter into global vec and get total number of global dofs */ 2421 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2422 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2423 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2424 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2425 /* Fill global_vec with cumulative function for global numbering */ 2426 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2427 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2428 nlocals = 0; 2429 first_index = -1; 2430 first_found = PETSC_FALSE; 2431 for (i=0;i<s;i++) { 2432 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2433 first_found = PETSC_TRUE; 2434 first_index = i; 2435 } 2436 nlocals += (PetscInt)PetscRealPart(array[i]); 2437 } 2438 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2439 if (!rank_prec_comm) { 2440 dof_displs[0]=0; 2441 for (i=1;i<size_prec_comm;i++) { 2442 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2443 } 2444 } 2445 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2446 if (first_found) { 2447 array[first_index] += (PetscScalar)nlocals; 2448 old_index = first_index; 2449 for (i=first_index+1;i<s;i++) { 2450 if (PetscRealPart(array[i]) > 0.0) { 2451 array[i] += array[old_index]; 2452 old_index = i; 2453 } 2454 } 2455 } 2456 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2457 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2458 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2459 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2460 /* get global ordering of local dofs */ 2461 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2462 if (local_dofs_mult) { 2463 for (i=0;i<n_local_dofs;i++) { 2464 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2465 } 2466 } else { 2467 for (i=0;i<n_local_dofs;i++) { 2468 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2469 } 2470 } 2471 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2472 /* free workspace */ 2473 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2474 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2475 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2476 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2477 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2478 /* return pointer to global ordering of local dofs */ 2479 *global_numbering_subset = temp_global_dofs; 2480 PetscFunctionReturn(0); 2481 } 2482 2483 #undef __FUNCT__ 2484 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2485 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2486 { 2487 PetscInt i,j; 2488 PetscScalar *alphas; 2489 PetscErrorCode ierr; 2490 2491 PetscFunctionBegin; 2492 /* this implements stabilized Gram-Schmidt */ 2493 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2494 for (i=0;i<n;i++) { 2495 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2496 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2497 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2498 } 2499 ierr = PetscFree(alphas);CHKERRQ(ierr); 2500 PetscFunctionReturn(0); 2501 } 2502 2503 /* TODO 2504 - now preallocation is done assuming SEQDENSE local matrices 2505 */ 2506 #undef __FUNCT__ 2507 #define __FUNCT__ "MatISGetMPIXAIJ" 2508 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M) 2509 { 2510 Mat new_mat; 2511 Mat_IS *matis = (Mat_IS*)(mat->data); 2512 /* info on mat */ 2513 /* ISLocalToGlobalMapping rmapping,cmapping; */ 2514 PetscInt bs,rows,cols; 2515 PetscInt lrows,lcols; 2516 PetscInt local_rows,local_cols; 2517 PetscBool isdense; 2518 /* values insertion */ 2519 PetscScalar *array; 2520 PetscInt *local_indices,*global_indices; 2521 /* work */ 2522 PetscInt i,j,index_row; 2523 PetscErrorCode ierr; 2524 2525 PetscFunctionBegin; 2526 /* MISSING CHECKS 2527 - rectangular case not covered (it is not allowed by MATIS) 2528 */ 2529 /* get info from mat */ 2530 /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */ 2531 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2532 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2533 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2534 2535 /* work */ 2536 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2537 for (i=0;i<local_rows;i++) local_indices[i]=i; 2538 /* map indices of local mat to global values */ 2539 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2540 /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */ 2541 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2542 2543 if (reuse==MAT_INITIAL_MATRIX) { 2544 Vec vec_dnz,vec_onz; 2545 PetscScalar *my_dnz,*my_onz; 2546 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2547 PetscInt index_col,owner; 2548 PetscMPIInt nsubdomains; 2549 2550 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2551 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2552 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2553 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2554 ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr); 2555 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2556 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2557 2558 /* 2559 preallocation 2560 */ 2561 2562 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2563 /* 2564 Some vectors are needed to sum up properly on shared interface dofs. 2565 Preallocation macros cannot do the job. 2566 Note that preallocation is not exact, since it overestimates nonzeros 2567 */ 2568 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2569 /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */ 2570 ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr); 2571 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2572 /* All processes need to compute entire row ownership */ 2573 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2574 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2575 for (i=0;i<nsubdomains;i++) { 2576 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2577 row_ownership[j]=i; 2578 } 2579 } 2580 2581 /* 2582 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2583 then, they will be summed up properly. This way, preallocation is always sufficient 2584 */ 2585 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2586 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2587 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2588 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2589 for (i=0;i<local_rows;i++) { 2590 index_row = global_indices[i]; 2591 for (j=i;j<local_rows;j++) { 2592 owner = row_ownership[index_row]; 2593 index_col = global_indices[j]; 2594 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2595 my_dnz[i] += 1.0; 2596 } else { /* offdiag block */ 2597 my_onz[i] += 1.0; 2598 } 2599 /* same as before, interchanging rows and cols */ 2600 if (i != j) { 2601 owner = row_ownership[index_col]; 2602 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2603 my_dnz[j] += 1.0; 2604 } else { 2605 my_onz[j] += 1.0; 2606 } 2607 } 2608 } 2609 } 2610 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2611 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2612 if (local_rows) { /* multilevel guard */ 2613 ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2614 ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2615 } 2616 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2617 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2618 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2619 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2620 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2621 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2622 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2623 2624 /* set computed preallocation in dnz and onz */ 2625 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2626 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2627 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2628 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2629 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2630 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2631 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2632 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2633 2634 /* Resize preallocation if overestimated */ 2635 for (i=0;i<lrows;i++) { 2636 dnz[i] = PetscMin(dnz[i],lcols); 2637 onz[i] = PetscMin(onz[i],cols-lcols); 2638 } 2639 /* set preallocation */ 2640 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2641 for (i=0;i<lrows/bs;i++) { 2642 dnz[i] = dnz[i*bs]/bs; 2643 onz[i] = onz[i*bs]/bs; 2644 } 2645 ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2646 for (i=0;i<lrows/bs;i++) { 2647 dnz[i] = dnz[i]-i; 2648 } 2649 ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2650 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2651 *M = new_mat; 2652 } else { 2653 PetscInt mbs,mrows,mcols; 2654 /* some checks */ 2655 ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); 2656 ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); 2657 if (mrows != rows) { 2658 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); 2659 } 2660 if (mrows != rows) { 2661 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); 2662 } 2663 if (mbs != bs) { 2664 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); 2665 } 2666 ierr = MatZeroEntries(*M);CHKERRQ(ierr); 2667 } 2668 /* set local to global mappings */ 2669 /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */ 2670 /* Set values */ 2671 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2672 if (isdense) { /* special case for dense local matrices */ 2673 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2674 ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr); 2675 ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2676 ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr); 2677 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2678 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2679 } else { /* very basic values insertion for all other matrix types */ 2680 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2681 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2682 for (i=0;i<local_rows;i++) { 2683 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2684 /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */ 2685 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2686 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2687 ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2688 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2689 } 2690 } 2691 ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2692 ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2693 if (isdense) { 2694 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2695 } 2696 PetscFunctionReturn(0); 2697 } 2698 2699 #undef __FUNCT__ 2700 #define __FUNCT__ "MatISSubassemble_Private" 2701 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2702 { 2703 Mat subdomain_adj; 2704 IS new_ranks,ranks_send_to; 2705 MatPartitioning partitioner; 2706 Mat_IS *matis; 2707 PetscInt n_neighs,*neighs,*n_shared,**shared; 2708 PetscInt prank; 2709 PetscMPIInt size,rank,color; 2710 PetscInt *xadj,*adjncy,*oldranks; 2711 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2712 PetscInt i,j,n_subdomains,local_size,threshold=0; 2713 PetscErrorCode ierr; 2714 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2715 PetscSubcomm subcomm; 2716 2717 PetscFunctionBegin; 2718 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2719 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2720 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2721 2722 /* Get info on mapping */ 2723 matis = (Mat_IS*)(mat->data); 2724 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2725 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2726 2727 /* build local CSR graph of subdomains' connectivity */ 2728 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2729 xadj[0] = 0; 2730 xadj[1] = PetscMax(n_neighs-1,0); 2731 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2732 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2733 2734 if (threshold) { 2735 PetscInt* count,min_threshold; 2736 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2737 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2738 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2739 for (j=0;j<n_shared[i];j++) { 2740 count[shared[i][j]] += 1; 2741 } 2742 } 2743 /* adapt threshold since we dont want to lose significant connections */ 2744 min_threshold = n_neighs; 2745 for (i=1;i<n_neighs;i++) { 2746 for (j=0;j<n_shared[i];j++) { 2747 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2748 } 2749 } 2750 threshold = PetscMax(min_threshold+1,threshold); 2751 xadj[1] = 0; 2752 for (i=1;i<n_neighs;i++) { 2753 for (j=0;j<n_shared[i];j++) { 2754 if (count[shared[i][j]] < threshold) { 2755 adjncy[xadj[1]] = neighs[i]; 2756 adjncy_wgt[xadj[1]] = n_shared[i]; 2757 xadj[1]++; 2758 break; 2759 } 2760 } 2761 } 2762 ierr = PetscFree(count);CHKERRQ(ierr); 2763 } else { 2764 if (xadj[1]) { 2765 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2766 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2767 } 2768 } 2769 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2770 if (use_square) { 2771 for (i=0;i<xadj[1];i++) { 2772 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2773 } 2774 } 2775 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2776 2777 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2778 2779 /* 2780 Restrict work on active processes only. 2781 */ 2782 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2783 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2784 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2785 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2786 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2787 if (color) { 2788 ierr = PetscFree(xadj);CHKERRQ(ierr); 2789 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2790 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2791 } else { 2792 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2793 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2794 prank = rank; 2795 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2796 for (i=0;i<size;i++) { 2797 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2798 } 2799 for (i=0;i<xadj[1];i++) { 2800 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2801 } 2802 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2803 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2804 n_subdomains = (PetscInt)size; 2805 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2806 2807 /* Partition */ 2808 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2809 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2810 if (use_vwgt) { 2811 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2812 v_wgt[0] = local_size; 2813 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2814 } 2815 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2816 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2817 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2818 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2819 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2820 2821 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2822 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2823 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2824 /* clean up */ 2825 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2826 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2827 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2828 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2829 } 2830 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2831 2832 /* assemble parallel IS for sends */ 2833 i = 1; 2834 if (color) i=0; 2835 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2836 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2837 2838 /* get back IS */ 2839 *is_sends = ranks_send_to; 2840 PetscFunctionReturn(0); 2841 } 2842 2843 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2844 2845 #undef __FUNCT__ 2846 #define __FUNCT__ "MatISSubassemble" 2847 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2848 { 2849 Mat local_mat,new_mat; 2850 Mat_IS *matis; 2851 IS is_sends_internal; 2852 PetscInt rows,cols; 2853 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2854 PetscBool ismatis,isdense; 2855 ISLocalToGlobalMapping l2gmap; 2856 PetscInt* l2gmap_indices; 2857 const PetscInt* is_indices; 2858 MatType new_local_type; 2859 MatTypePrivate new_local_type_private; 2860 /* buffers */ 2861 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2862 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2863 /* MPI */ 2864 MPI_Comm comm; 2865 PetscMPIInt n_sends,n_recvs,commsize; 2866 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2867 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2868 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2869 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2870 PetscErrorCode ierr; 2871 2872 PetscFunctionBegin; 2873 /* checks */ 2874 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2875 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2876 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2877 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2878 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2879 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2880 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2881 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2882 PetscValidLogicalCollectiveInt(mat,bs,0); 2883 /* prepare IS for sending if not provided */ 2884 if (!is_sends) { 2885 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2886 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2887 } else { 2888 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2889 is_sends_internal = is_sends; 2890 } 2891 2892 /* get pointer of MATIS data */ 2893 matis = (Mat_IS*)mat->data; 2894 2895 /* get comm */ 2896 comm = PetscObjectComm((PetscObject)mat); 2897 2898 /* compute number of sends */ 2899 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2900 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2901 2902 /* compute number of receives */ 2903 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2904 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2905 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2906 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2907 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2908 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2909 ierr = PetscFree(iflags);CHKERRQ(ierr); 2910 2911 /* prepare send/receive buffers */ 2912 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2913 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2914 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2915 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2916 2917 /* Get data from local mat */ 2918 if (!isdense) { 2919 /* TODO: See below some guidelines on how to prepare the local buffers */ 2920 /* 2921 send_buffer_vals should contain the raw values of the local matrix 2922 send_buffer_idxs should contain: 2923 - MatType_PRIVATE type 2924 - PetscInt size_of_l2gmap 2925 - PetscInt global_row_indices[size_of_l2gmap] 2926 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2927 */ 2928 } else { 2929 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2930 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2931 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2932 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2933 send_buffer_idxs[1] = i; 2934 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2935 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2936 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2937 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2938 for (i=0;i<n_sends;i++) { 2939 ilengths_vals[is_indices[i]] = len*len; 2940 ilengths_idxs[is_indices[i]] = len+2; 2941 } 2942 } 2943 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2944 buf_size_idxs = 0; 2945 buf_size_vals = 0; 2946 for (i=0;i<n_recvs;i++) { 2947 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2948 buf_size_vals += (PetscInt)olengths_vals[i]; 2949 } 2950 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2951 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2952 2953 /* get new tags for clean communications */ 2954 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2955 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2956 2957 /* allocate for requests */ 2958 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2959 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2960 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2961 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2962 2963 /* communications */ 2964 ptr_idxs = recv_buffer_idxs; 2965 ptr_vals = recv_buffer_vals; 2966 for (i=0;i<n_recvs;i++) { 2967 source_dest = onodes[i]; 2968 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2969 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2970 ptr_idxs += olengths_idxs[i]; 2971 ptr_vals += olengths_vals[i]; 2972 } 2973 for (i=0;i<n_sends;i++) { 2974 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2975 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2976 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2977 } 2978 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2979 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2980 2981 /* assemble new l2g map */ 2982 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2983 ptr_idxs = recv_buffer_idxs; 2984 buf_size_idxs = 0; 2985 for (i=0;i<n_recvs;i++) { 2986 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2987 ptr_idxs += olengths_idxs[i]; 2988 } 2989 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2990 ptr_idxs = recv_buffer_idxs; 2991 buf_size_idxs = 0; 2992 for (i=0;i<n_recvs;i++) { 2993 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2994 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2995 ptr_idxs += olengths_idxs[i]; 2996 } 2997 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2998 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2999 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3000 3001 /* infer new local matrix type from received local matrices type */ 3002 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3003 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3004 new_local_type_private = MATAIJ_PRIVATE; 3005 new_local_type = MATSEQAIJ; 3006 if (n_recvs) { 3007 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3008 ptr_idxs = recv_buffer_idxs; 3009 for (i=0;i<n_recvs;i++) { 3010 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3011 new_local_type_private = MATAIJ_PRIVATE; 3012 break; 3013 } 3014 ptr_idxs += olengths_idxs[i]; 3015 } 3016 switch (new_local_type_private) { 3017 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 3018 new_local_type = MATSEQAIJ; 3019 bs = 1; 3020 break; 3021 case MATAIJ_PRIVATE: 3022 new_local_type = MATSEQAIJ; 3023 bs = 1; 3024 break; 3025 case MATBAIJ_PRIVATE: 3026 new_local_type = MATSEQBAIJ; 3027 break; 3028 case MATSBAIJ_PRIVATE: 3029 new_local_type = MATSEQSBAIJ; 3030 break; 3031 default: 3032 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3033 break; 3034 } 3035 } 3036 3037 /* create MATIS object */ 3038 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3039 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 3040 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3041 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 3042 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3043 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3044 3045 /* set values */ 3046 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3047 ptr_vals = recv_buffer_vals; 3048 ptr_idxs = recv_buffer_idxs; 3049 for (i=0;i<n_recvs;i++) { 3050 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3051 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3052 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3053 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3054 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3055 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3056 } 3057 ptr_idxs += olengths_idxs[i]; 3058 ptr_vals += olengths_vals[i]; 3059 } 3060 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3061 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3062 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3063 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3064 3065 { /* check */ 3066 Vec lvec,rvec; 3067 PetscReal infty_error; 3068 3069 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3070 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3071 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3072 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3073 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 3074 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3075 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3076 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3077 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3078 } 3079 3080 /* free workspace */ 3081 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3082 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3083 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3084 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3085 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3086 if (isdense) { 3087 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3088 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3089 } else { 3090 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3091 } 3092 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3093 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3094 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3095 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3096 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3097 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3098 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3099 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3100 ierr = PetscFree(onodes);CHKERRQ(ierr); 3101 /* get back new mat */ 3102 *mat_n = new_mat; 3103 PetscFunctionReturn(0); 3104 } 3105 3106 #undef __FUNCT__ 3107 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3108 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3109 { 3110 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3111 PC_IS *pcis = (PC_IS*)pc->data; 3112 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3113 MatNullSpace CoarseNullSpace=NULL; 3114 ISLocalToGlobalMapping coarse_islg; 3115 IS coarse_is; 3116 PetscInt max_it; 3117 PetscInt im_active=-1,active_procs=-1; 3118 PC pc_temp; 3119 PCType coarse_pc_type; 3120 KSPType coarse_ksp_type; 3121 PetscBool multilevel_requested,multilevel_allowed; 3122 PetscBool setsym,issym,isbddc,isnn,coarse_reuse; 3123 MatStructure matstruct; 3124 PetscErrorCode ierr; 3125 3126 PetscFunctionBegin; 3127 /* Assign global numbering to coarse dofs */ 3128 if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */ 3129 PetscInt ocoarse_size; 3130 ocoarse_size = pcbddc->coarse_size; 3131 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3132 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3133 /* see if we can avoid some work */ 3134 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3135 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3136 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3137 coarse_reuse = PETSC_FALSE; 3138 } else { /* we can safely reuse already computed coarse matrix */ 3139 coarse_reuse = PETSC_TRUE; 3140 } 3141 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3142 coarse_reuse = PETSC_FALSE; 3143 } 3144 } else { /* primal space has not been changed, so we can reuse coarse matrix */ 3145 coarse_reuse = PETSC_TRUE; 3146 } 3147 3148 /* infer some info from user */ 3149 issym = PETSC_FALSE; 3150 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3151 multilevel_allowed = PETSC_FALSE; 3152 multilevel_requested = PETSC_FALSE; 3153 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 3154 if (multilevel_requested) { 3155 /* count "active processes" */ 3156 im_active = !!(pcis->n); 3157 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3158 if (active_procs/pcbddc->coarsening_ratio < 2) { 3159 multilevel_allowed = PETSC_FALSE; 3160 } else { 3161 multilevel_allowed = PETSC_TRUE; 3162 } 3163 } 3164 3165 /* set defaults for coarse KSP and PC */ 3166 if (multilevel_allowed) { 3167 if (issym) { 3168 coarse_ksp_type = KSPRICHARDSON; 3169 } else { 3170 coarse_ksp_type = KSPCHEBYSHEV; 3171 } 3172 coarse_pc_type = PCBDDC; 3173 } else { 3174 coarse_ksp_type = KSPPREONLY; 3175 coarse_pc_type = PCREDUNDANT; 3176 } 3177 3178 /* create the coarse KSP object only once with defaults */ 3179 if (!pcbddc->coarse_ksp) { 3180 char prefix[256],str_level[3]; 3181 size_t len; 3182 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3183 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3184 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3185 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3186 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3187 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3188 /* prefix */ 3189 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3190 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3191 if (!pcbddc->current_level) { 3192 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3193 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3194 } else { 3195 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3196 if (pcbddc->current_level>1) len -= 2; 3197 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3198 *(prefix+len)='\0'; 3199 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3200 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3201 } 3202 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3203 } 3204 /* allow user customization */ 3205 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3206 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3207 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3208 3209 /* get some info after set from options */ 3210 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3211 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3212 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3213 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3214 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3215 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3216 isbddc = PETSC_FALSE; 3217 } 3218 3219 /* propagate BDDC info to the next level */ 3220 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3221 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3222 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3223 3224 /* creates temporary MATIS object for coarse matrix */ 3225 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3226 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3227 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3228 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3229 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3230 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3231 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3232 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3233 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3234 3235 /* assemble coarse matrix */ 3236 if (isbddc || isnn) { 3237 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3238 } else { 3239 if (coarse_reuse) { 3240 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3241 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3242 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr); 3243 } else { 3244 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3245 } 3246 } 3247 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3248 3249 /* create local to global scatters for coarse problem */ 3250 if (pcbddc->new_primal_space) { 3251 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3252 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 3253 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3254 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3255 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3256 } 3257 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3258 3259 /* propagate symmetry info to coarse matrix */ 3260 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3261 3262 /* Compute coarse null space (special handling by BDDC only) */ 3263 if (pcbddc->NullSpace) { 3264 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3265 if (isbddc) { 3266 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3267 } else { 3268 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3269 } 3270 } 3271 3272 /* set operators */ 3273 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3274 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3275 3276 /* additional KSP customization */ 3277 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3278 if (max_it < 5) { 3279 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3280 } 3281 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3282 3283 3284 /* print some info if requested */ 3285 if (pcbddc->dbg_flag) { 3286 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3287 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3288 if (!multilevel_allowed) { 3289 if (multilevel_requested) { 3290 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3291 } else if (pcbddc->max_levels) { 3292 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3293 } 3294 } 3295 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3296 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3297 } 3298 3299 /* setup coarse ksp */ 3300 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3301 if (pcbddc->dbg_flag) { 3302 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3303 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3304 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3305 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3306 } 3307 3308 /* Check coarse problem if requested */ 3309 if (pcbddc->dbg_flag) { 3310 KSP check_ksp; 3311 KSPType check_ksp_type; 3312 PC check_pc; 3313 Vec check_vec; 3314 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3315 PetscInt its; 3316 PetscBool ispreonly,compute; 3317 3318 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3319 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3320 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3321 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3322 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3323 if (ispreonly) { 3324 check_ksp_type = KSPPREONLY; 3325 compute = PETSC_FALSE; 3326 } else { 3327 if (issym) check_ksp_type = KSPCG; 3328 else check_ksp_type = KSPGMRES; 3329 compute = PETSC_TRUE; 3330 } 3331 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3332 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3333 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3334 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3335 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3336 /* create random vec */ 3337 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3338 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3339 if (CoarseNullSpace) { 3340 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3341 } 3342 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3343 /* solve coarse problem */ 3344 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3345 if (CoarseNullSpace) { 3346 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3347 } 3348 /* check coarse problem residual error */ 3349 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3350 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3351 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3352 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3353 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3354 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3355 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3356 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3357 /* get eigenvalue estimation if preonly has not been requested */ 3358 if (!ispreonly) { 3359 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3360 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3361 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3362 } 3363 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3364 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3365 } 3366 /* free memory */ 3367 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3368 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3369 PetscFunctionReturn(0); 3370 } 3371 3372 #undef __FUNCT__ 3373 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3374 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3375 { 3376 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3377 PC_IS* pcis = (PC_IS*)pc->data; 3378 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3379 PetscInt i,coarse_size; 3380 PetscInt *local_primal_indices; 3381 PetscErrorCode ierr; 3382 3383 PetscFunctionBegin; 3384 /* Compute global number of coarse dofs */ 3385 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3386 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3387 } 3388 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3389 3390 /* check numbering */ 3391 if (pcbddc->dbg_flag) { 3392 PetscScalar coarsesum,*array; 3393 3394 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3395 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3396 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3397 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3398 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3399 for (i=0;i<pcbddc->local_primal_size;i++) { 3400 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3401 } 3402 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3403 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3404 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3405 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3406 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3407 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3408 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3409 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3410 for (i=0;i<pcis->n;i++) { 3411 if (array[i] == 1.0) { 3412 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3413 } 3414 } 3415 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3416 for (i=0;i<pcis->n;i++) { 3417 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3418 } 3419 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3420 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3421 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3422 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3423 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3424 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3425 if (pcbddc->dbg_flag > 1) { 3426 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3427 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3428 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3429 for (i=0;i<pcbddc->local_primal_size;i++) { 3430 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3431 } 3432 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3433 } 3434 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3435 } 3436 /* get back data */ 3437 *coarse_size_n = coarse_size; 3438 *local_primal_indices_n = local_primal_indices; 3439 PetscFunctionReturn(0); 3440 } 3441 3442