1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 76 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCResetSolvers" 82 PetscErrorCode PCBDDCResetSolvers(PC pc) 83 { 84 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 89 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 94 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 100 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 101 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 104 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 107 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 108 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 110 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 111 PetscFunctionReturn(0); 112 } 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 116 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 117 { 118 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 119 PC_IS *pcis = (PC_IS*)pc->data; 120 VecType impVecType; 121 PetscInt n_constraints,n_R,old_size; 122 PetscErrorCode ierr; 123 124 PetscFunctionBegin; 125 if (!pcbddc->ConstraintMatrix) { 126 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 127 } 128 /* get sizes */ 129 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 130 n_R = pcis->n-pcbddc->n_actual_vertices; 131 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 132 /* local work vectors (try to avoid unneeded work)*/ 133 /* R nodes */ 134 old_size = -1; 135 if (pcbddc->vec1_R) { 136 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 137 } 138 if (n_R != old_size) { 139 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 140 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 141 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 142 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 143 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 144 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 145 } 146 /* local primal dofs */ 147 old_size = -1; 148 if (pcbddc->vec1_P) { 149 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 150 } 151 if (pcbddc->local_primal_size != old_size) { 152 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 153 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 154 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 155 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 156 } 157 /* local explicit constraints */ 158 old_size = -1; 159 if (pcbddc->vec1_C) { 160 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 161 } 162 if (n_constraints && n_constraints != old_size) { 163 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 164 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 165 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 166 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNCT__ 172 #define __FUNCT__ "PCBDDCSetUpCorrection" 173 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 174 { 175 PetscErrorCode ierr; 176 /* pointers to pcis and pcbddc */ 177 PC_IS* pcis = (PC_IS*)pc->data; 178 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 179 /* submatrices of local problem */ 180 Mat A_RV,A_VR,A_VV; 181 /* working matrices */ 182 Mat M1,M2,M3,C_CR; 183 /* working vectors */ 184 Vec vec1_C,vec2_C,vec1_V,vec2_V; 185 /* additional working stuff */ 186 IS is_aux; 187 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 188 const PetscScalar *array,*row_cmat_values; 189 const PetscInt *row_cmat_indices,*idx_R_local; 190 PetscInt *idx_V_B,*auxindices; 191 PetscInt n_vertices,n_constraints,size_of_constraint; 192 PetscInt i,j,n_R,n_D,n_B; 193 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 194 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 195 MatType impMatType; 196 /* some shortcuts to scalars */ 197 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 198 /* for debugging purposes */ 199 PetscReal *coarsefunctions_errors,*constraints_errors; 200 201 PetscFunctionBegin; 202 /* get number of vertices (corners plus constraints with change of basis) 203 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 204 n_vertices = pcbddc->n_actual_vertices; 205 n_constraints = pcbddc->local_primal_size-n_vertices; 206 /* Set Non-overlapping dimensions */ 207 n_B = pcis->n_B; n_D = pcis->n - n_B; 208 n_R = pcis->n-n_vertices; 209 210 /* Set types for local objects needed by BDDC precondtioner */ 211 impMatType = MATSEQDENSE; 212 213 /* Allocating some extra storage just to be safe */ 214 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 215 for (i=0;i<pcis->n;i++) auxindices[i]=i; 216 217 /* vertices in boundary numbering */ 218 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 219 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 220 if (i != n_vertices) { 221 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 222 } 223 224 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 225 if (n_constraints) { 226 /* see if we can save some allocations */ 227 if (pcbddc->local_auxmat2) { 228 PetscInt on_R,on_constraints; 229 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 230 if (on_R != n_R || on_constraints != n_constraints) { 231 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 232 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 233 } 234 } 235 /* work vectors */ 236 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 237 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 238 /* auxiliary matrices */ 239 if (!pcbddc->local_auxmat2) { 240 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 241 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 242 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 243 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 244 } 245 246 /* Extract constraints on R nodes: C_{CR} */ 247 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 248 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 249 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 250 251 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 252 for (i=0;i<n_constraints;i++) { 253 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 254 /* Get row of constraint matrix in R numbering */ 255 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 256 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 257 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 258 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 259 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 260 /* Solve for row of constraint matrix in R numbering */ 261 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 262 /* Set values in local_auxmat2 */ 263 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 264 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 265 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 266 } 267 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 268 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 269 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 270 271 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 272 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 273 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 274 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 275 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 276 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 277 ierr = MatSetUp(M1);CHKERRQ(ierr); 278 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 279 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 280 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 281 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 282 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 283 ierr = MatDestroy(&M2);CHKERRQ(ierr); 284 ierr = MatDestroy(&M3);CHKERRQ(ierr); 285 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 286 if (!pcbddc->local_auxmat1) { 287 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 288 } else { 289 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 290 } 291 } 292 293 /* Get submatrices from subdomain matrix */ 294 if (n_vertices) { 295 PetscInt ibs,mbs; 296 PetscBool issbaij; 297 Mat newmat; 298 299 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 300 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 301 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 302 if (ibs != mbs) { /* need to convert to SEQAIJ */ 303 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 304 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 305 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 307 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 308 } else { 309 /* this is safe */ 310 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 311 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 312 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 313 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 314 /* which of the two approaches is faster? */ 315 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 316 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 317 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 318 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 319 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 320 } else { 321 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 322 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 323 } 324 } 325 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 326 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 327 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 328 } 329 330 /* Matrix of coarse basis functions (local) */ 331 if (pcbddc->coarse_phi_B) { 332 PetscInt on_B,on_primal; 333 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 334 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 335 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 336 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 337 } 338 } 339 if (pcbddc->coarse_phi_D) { 340 PetscInt on_D,on_primal; 341 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 342 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 343 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 344 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 345 } 346 } 347 if (!pcbddc->coarse_phi_B) { 348 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 349 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 350 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 351 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 352 } 353 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 354 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 355 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 356 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 357 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 358 } 359 360 if (pcbddc->dbg_flag) { 361 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 362 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 363 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 364 } 365 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 366 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 367 368 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 369 370 /* vertices */ 371 for (i=0;i<n_vertices;i++) { 372 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 373 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 374 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 375 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 376 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 377 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 378 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 379 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 380 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 381 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 382 if (n_constraints) { 383 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 384 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 385 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 386 } 387 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 388 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 389 390 /* Set values in coarse basis function and subdomain part of coarse_mat */ 391 /* coarse basis functions */ 392 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 393 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 394 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 395 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 396 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 397 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 398 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 399 if (pcbddc->switch_static || pcbddc->dbg_flag) { 400 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 401 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 402 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 403 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 404 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 405 } 406 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 407 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 408 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 409 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 410 if (n_constraints) { 411 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 412 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 413 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 414 } 415 416 /* check */ 417 if (pcbddc->dbg_flag) { 418 /* assemble subdomain vector on local nodes */ 419 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 420 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 421 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 422 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 423 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 425 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 426 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 427 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 428 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 429 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 430 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 431 if (n_constraints) { 432 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 433 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 434 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 435 } 436 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 437 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 438 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 439 /* check saddle point solution */ 440 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 441 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 442 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 443 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 444 /* shift by the identity matrix */ 445 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 446 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 447 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 449 } 450 } 451 452 /* constraints */ 453 for (i=0;i<n_constraints;i++) { 454 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 455 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 456 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 457 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 458 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 459 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 460 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 461 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 462 if (n_vertices) { 463 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 464 } 465 /* Set values in coarse basis function and subdomain part of coarse_mat */ 466 /* coarse basis functions */ 467 j = i+n_vertices; /* don't touch this! */ 468 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 469 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 470 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 472 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 473 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 474 if (pcbddc->switch_static || pcbddc->dbg_flag) { 475 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 476 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 478 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 479 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 480 } 481 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 482 if (n_vertices) { 483 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 484 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 485 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 486 } 487 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 488 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 489 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 490 491 if (pcbddc->dbg_flag) { 492 /* assemble subdomain vector on nodes */ 493 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 494 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 495 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 496 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 497 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 498 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 499 /* assemble subdomain vector of lagrange multipliers */ 500 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 501 if (n_vertices) { 502 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 503 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 504 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 505 } 506 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 507 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 508 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 509 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 510 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 511 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 512 /* check saddle point solution */ 513 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 514 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 515 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 516 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 517 /* shift by the identity matrix */ 518 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 519 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 520 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 521 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 522 } 523 } 524 /* call assembling routines for local coarse basis */ 525 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 526 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 527 if (pcbddc->switch_static || pcbddc->dbg_flag) { 528 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 529 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 530 } 531 532 /* compute other basis functions for non-symmetric problems */ 533 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 534 if (!setsym || (setsym && !issym)) { 535 if (!pcbddc->coarse_psi_B) { 536 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 537 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 538 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 539 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 540 } 541 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 542 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 543 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 544 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 545 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 546 } 547 for (i=0;i<pcbddc->local_primal_size;i++) { 548 if (n_constraints) { 549 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 550 for (j=0;j<n_constraints;j++) { 551 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 552 } 553 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 554 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 555 } 556 if (i<n_vertices) { 557 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 558 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 559 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 560 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 561 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 562 if (n_constraints) { 563 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 564 } 565 } else { 566 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 567 } 568 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 569 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 570 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 571 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 572 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 573 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 574 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 575 if (i<n_vertices) { 576 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 577 } 578 if (pcbddc->switch_static || pcbddc->dbg_flag) { 579 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 580 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 581 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 582 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 583 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 584 } 585 586 if (pcbddc->dbg_flag) { 587 /* assemble subdomain vector on nodes */ 588 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 589 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 590 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 591 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 592 if (i<n_vertices) { 593 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 594 } 595 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 596 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 597 /* assemble subdomain vector of lagrange multipliers */ 598 for (j=0;j<pcbddc->local_primal_size;j++) { 599 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 600 } 601 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 602 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 603 /* check saddle point solution */ 604 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 605 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 606 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 607 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 608 /* shift by the identity matrix */ 609 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 610 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 611 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 612 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 613 } 614 } 615 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 616 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 617 if (pcbddc->switch_static || pcbddc->dbg_flag) { 618 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 619 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 620 } 621 } 622 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 623 /* Checking coarse_sub_mat and coarse basis functios */ 624 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 625 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 626 if (pcbddc->dbg_flag) { 627 Mat coarse_sub_mat; 628 Mat AUXMAT,TM1,TM2,TM3,TM4; 629 Mat coarse_phi_D,coarse_phi_B; 630 Mat coarse_psi_D,coarse_psi_B; 631 Mat A_II,A_BB,A_IB,A_BI; 632 MatType checkmattype=MATSEQAIJ; 633 PetscReal real_value; 634 635 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 636 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 637 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 638 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 639 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 640 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 641 if (pcbddc->coarse_psi_B) { 642 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 643 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 644 } 645 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 646 647 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 648 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 649 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 650 if (pcbddc->coarse_psi_B) { 651 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 652 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 653 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 654 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 655 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 656 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 657 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 658 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 659 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 660 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 661 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 662 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 663 } else { 664 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 665 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 666 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 667 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 668 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 669 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 670 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 671 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 672 } 673 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 674 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 675 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 676 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 677 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 678 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 679 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 680 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 681 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 682 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 683 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 684 for (i=0;i<pcbddc->local_primal_size;i++) { 685 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 686 } 687 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 688 for (i=0;i<pcbddc->local_primal_size;i++) { 689 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 690 } 691 if (pcbddc->coarse_psi_B) { 692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 693 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 695 } 696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 697 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 699 } 700 } 701 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 702 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 703 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 704 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 705 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 706 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 707 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 708 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 709 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 710 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 711 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 712 if (pcbddc->coarse_psi_B) { 713 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 714 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 715 } 716 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 717 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 718 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 719 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 720 } 721 /* free memory */ 722 if (n_vertices) { 723 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 724 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 725 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 726 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 727 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 728 } 729 if (n_constraints) { 730 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 731 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 732 ierr = MatDestroy(&M1);CHKERRQ(ierr); 733 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 734 } 735 ierr = PetscFree(auxindices);CHKERRQ(ierr); 736 /* get back data */ 737 *coarse_submat_vals_n = coarse_submat_vals; 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 743 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 744 { 745 PC_IS* pcis = (PC_IS*)(pc->data); 746 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 747 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 748 PetscBool issbaij,isbaij; 749 /* manage repeated solves */ 750 MatReuse reuse; 751 MatStructure matstruct; 752 PetscErrorCode ierr; 753 754 PetscFunctionBegin; 755 if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) { 756 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 757 } 758 /* get mat flags */ 759 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 760 reuse = MAT_INITIAL_MATRIX; 761 if (pc->setupcalled) { 762 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 763 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 764 if (matstruct == SAME_NONZERO_PATTERN) { 765 reuse = MAT_REUSE_MATRIX; 766 } else { 767 reuse = MAT_INITIAL_MATRIX; 768 } 769 } 770 if (reuse == MAT_INITIAL_MATRIX) { 771 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 772 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 773 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 774 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 775 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 776 } 777 778 /* transform local matrices if needed */ 779 if (pcbddc->use_change_of_basis) { 780 Mat change_mat_all; 781 PetscScalar *row_cmat_values; 782 PetscInt *row_cmat_indices; 783 PetscInt *nnz,*is_indices,*temp_indices; 784 PetscInt i,j,k,n_D,n_B; 785 786 /* Get Non-overlapping dimensions */ 787 n_B = pcis->n_B; 788 n_D = pcis->n-n_B; 789 790 /* compute nonzero structure of change of basis on all local nodes */ 791 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 792 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 793 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 794 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 795 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 796 k=1; 797 for (i=0;i<n_B;i++) { 798 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 799 nnz[is_indices[i]]=j; 800 if (k < j) k = j; 801 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 802 } 803 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 804 /* assemble change of basis matrix on the whole set of local dofs */ 805 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 806 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 807 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 808 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 809 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 810 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 811 for (i=0;i<n_D;i++) { 812 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 813 } 814 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 815 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 816 for (i=0;i<n_B;i++) { 817 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 818 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 819 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 820 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 821 } 822 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 824 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 825 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 826 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 827 if (!issbaij && !isbaij) { 828 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 829 } else { 830 Mat work_mat; 831 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 832 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 833 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 834 } 835 /* 836 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 837 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 838 */ 839 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 840 ierr = PetscFree(nnz);CHKERRQ(ierr); 841 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 842 } else { 843 /* without change of basis, the local matrix is unchanged */ 844 if (!pcbddc->local_mat) { 845 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 846 pcbddc->local_mat = matis->A; 847 } 848 } 849 850 /* get submatrices */ 851 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 852 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 853 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 854 if (!issbaij) { 855 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 856 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 857 } else { 858 Mat newmat; 859 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 860 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 861 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 862 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 863 } 864 PetscFunctionReturn(0); 865 } 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 869 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 870 { 871 PC_IS* pcis = (PC_IS*)(pc->data); 872 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 873 IS is_aux1,is_aux2; 874 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 875 PetscInt n_vertices,i,j,n_R,n_D,n_B; 876 PetscInt vbs,bs; 877 PetscBT bitmask; 878 PetscErrorCode ierr; 879 880 PetscFunctionBegin; 881 /* No need to setup local scatters if primal space is unchanged */ 882 if (!pcbddc->new_primal_space_local && !pcbddc->dbg_flag) { 883 PetscFunctionReturn(0); 884 } 885 /* destroy old objects */ 886 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 887 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 888 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 889 /* Set Non-overlapping dimensions */ 890 n_B = pcis->n_B; n_D = pcis->n - n_B; 891 n_vertices = pcbddc->n_actual_vertices; 892 /* create auxiliary bitmask */ 893 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 894 for (i=0;i<n_vertices;i++) { 895 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 896 } 897 898 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 899 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 900 for (i=0, n_R=0; i<pcis->n; i++) { 901 if (!PetscBTLookup(bitmask,i)) { 902 idx_R_local[n_R] = i; 903 n_R++; 904 } 905 } 906 907 /* Block code */ 908 vbs = 1; 909 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 910 if (bs>1 && !(n_vertices%bs)) { 911 PetscBool is_blocked = PETSC_TRUE; 912 PetscInt *vary; 913 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 914 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 915 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 916 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 917 for (i=0; i<n_vertices; i++) { 918 if (vary[i]!=0 && vary[i]!=bs) { 919 is_blocked = PETSC_FALSE; 920 break; 921 } 922 } 923 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 924 vbs = bs; 925 for (i=0;i<n_R/vbs;i++) { 926 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 927 } 928 } 929 ierr = PetscFree(vary);CHKERRQ(ierr); 930 } 931 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 932 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 933 934 /* print some info if requested */ 935 if (pcbddc->dbg_flag) { 936 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 937 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 938 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 939 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 940 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 941 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 942 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 943 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 944 } 945 946 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 947 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 948 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 949 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 950 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 951 for (i=0; i<n_D; i++) { 952 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 953 } 954 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 955 for (i=0, j=0; i<n_R; i++) { 956 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 957 aux_array1[j++] = i; 958 } 959 } 960 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 961 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 962 for (i=0, j=0; i<n_B; i++) { 963 if (!PetscBTLookup(bitmask,is_indices[i])) { 964 aux_array2[j++] = i; 965 } 966 } 967 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 968 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 969 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 970 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 971 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 972 973 if (pcbddc->switch_static || pcbddc->dbg_flag) { 974 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 975 for (i=0, j=0; i<n_R; i++) { 976 if (PetscBTLookup(bitmask,idx_R_local[i])) { 977 aux_array1[j++] = i; 978 } 979 } 980 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 981 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 982 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 983 } 984 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 985 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 986 PetscFunctionReturn(0); 987 } 988 989 990 #undef __FUNCT__ 991 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 992 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 993 { 994 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 995 PC_IS *pcis = (PC_IS*)pc->data; 996 PC pc_temp; 997 Mat A_RR; 998 MatStructure matstruct; 999 MatReuse reuse; 1000 PetscScalar m_one = -1.0; 1001 PetscReal value; 1002 PetscInt n_D,n_R,ibs,mbs; 1003 PetscBool use_exact,use_exact_reduced,issbaij; 1004 PetscErrorCode ierr; 1005 /* prefixes stuff */ 1006 char dir_prefix[256],neu_prefix[256],str_level[3]; 1007 size_t len; 1008 1009 PetscFunctionBegin; 1010 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 1011 1012 /* compute prefixes */ 1013 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1014 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1015 if (!pcbddc->current_level) { 1016 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1017 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1018 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1019 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1020 } else { 1021 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1022 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 1023 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1024 len -= 15; /* remove "pc_bddc_coarse_" */ 1025 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 1026 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1027 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1028 *(dir_prefix+len)='\0'; 1029 *(neu_prefix+len)='\0'; 1030 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1031 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1032 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1033 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1034 } 1035 1036 /* DIRICHLET PROBLEM */ 1037 /* Matrix for Dirichlet problem is pcis->A_II */ 1038 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1039 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1040 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1041 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1042 /* default */ 1043 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1044 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1045 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1046 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1047 if (issbaij) { 1048 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1049 } else { 1050 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1051 } 1052 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1053 } 1054 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 1055 /* Allow user's customization */ 1056 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1057 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1058 if (!n_D) { 1059 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1060 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1061 } 1062 /* Set Up KSP for Dirichlet problem of BDDC */ 1063 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1064 /* set ksp_D into pcis data */ 1065 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1066 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1067 pcis->ksp_D = pcbddc->ksp_D; 1068 1069 /* NEUMANN PROBLEM */ 1070 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1071 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1072 if (pcbddc->ksp_R) { /* already created ksp */ 1073 PetscInt nn_R; 1074 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr); 1075 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1076 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1077 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1078 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1079 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1080 reuse = MAT_INITIAL_MATRIX; 1081 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1082 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1083 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1084 reuse = MAT_INITIAL_MATRIX; 1085 } else { /* safe to reuse the matrix */ 1086 reuse = MAT_REUSE_MATRIX; 1087 } 1088 } 1089 /* last check */ 1090 if (matstruct == DIFFERENT_NONZERO_PATTERN) { 1091 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1092 reuse = MAT_INITIAL_MATRIX; 1093 } 1094 } else { /* first time, so we need to create the matrix */ 1095 reuse = MAT_INITIAL_MATRIX; 1096 } 1097 /* extract A_RR */ 1098 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1099 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1100 if (ibs != mbs) { 1101 Mat newmat; 1102 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1103 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1104 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1105 } else { 1106 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1107 } 1108 if (!pcbddc->ksp_R) { /* create object if not present */ 1109 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1110 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1111 /* default */ 1112 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1113 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1114 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1115 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1116 if (issbaij) { 1117 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1118 } else { 1119 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1120 } 1121 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1122 } 1123 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1124 /* Allow user's customization */ 1125 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1126 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1127 if (!n_R) { 1128 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1129 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1130 } 1131 /* Set Up KSP for Neumann problem of BDDC */ 1132 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1133 1134 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1135 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1136 /* Dirichlet */ 1137 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1138 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1139 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1140 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1141 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1142 /* need to be adapted? */ 1143 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1144 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1145 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1146 /* print info */ 1147 if (pcbddc->dbg_flag) { 1148 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1149 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1150 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1151 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1152 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1153 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1154 } 1155 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1156 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1157 } 1158 1159 /* Neumann */ 1160 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1161 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1162 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1163 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1164 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1165 /* need to be adapted? */ 1166 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1167 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1168 /* print info */ 1169 if (pcbddc->dbg_flag) { 1170 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1171 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1172 } 1173 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1174 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1175 } 1176 } 1177 /* free Neumann problem's matrix */ 1178 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1184 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1185 { 1186 PetscErrorCode ierr; 1187 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1188 1189 PetscFunctionBegin; 1190 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1191 if (pcbddc->local_auxmat1) { 1192 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1193 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1194 } 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1200 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1201 { 1202 PetscErrorCode ierr; 1203 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1204 PC_IS* pcis = (PC_IS*) (pc->data); 1205 const PetscScalar zero = 0.0; 1206 1207 PetscFunctionBegin; 1208 /* Application of PHI^T (or PSI^T) */ 1209 if (pcbddc->coarse_psi_B) { 1210 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1211 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1212 } else { 1213 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1214 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1215 } 1216 /* Scatter data of coarse_rhs */ 1217 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1218 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1219 1220 /* Local solution on R nodes */ 1221 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1222 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1223 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 if (pcbddc->switch_static) { 1225 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1226 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1227 } 1228 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1229 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1230 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1231 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1232 if (pcbddc->switch_static) { 1233 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1234 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1235 } 1236 1237 /* Coarse solution */ 1238 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1240 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1241 } 1242 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1243 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1244 1245 /* Sum contributions from two levels */ 1246 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1247 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1253 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1254 { 1255 PetscErrorCode ierr; 1256 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1257 1258 PetscFunctionBegin; 1259 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1265 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1266 { 1267 PetscErrorCode ierr; 1268 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1269 1270 PetscFunctionBegin; 1271 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 /* uncomment for testing purposes */ 1276 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1279 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1280 { 1281 PetscErrorCode ierr; 1282 PC_IS* pcis = (PC_IS*)(pc->data); 1283 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1284 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1285 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1286 MatType impMatType=MATSEQAIJ; 1287 /* one and zero */ 1288 PetscScalar one=1.0,zero=0.0; 1289 /* space to store constraints and their local indices */ 1290 PetscScalar *temp_quadrature_constraint; 1291 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1292 /* iterators */ 1293 PetscInt i,j,k,total_counts,temp_start_ptr; 1294 /* stuff to store connected components stored in pcbddc->mat_graph */ 1295 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1296 PetscInt n_ISForFaces,n_ISForEdges; 1297 /* near null space stuff */ 1298 MatNullSpace nearnullsp; 1299 const Vec *nearnullvecs; 1300 Vec *localnearnullsp; 1301 PetscBool nnsp_has_cnst; 1302 PetscInt nnsp_size; 1303 PetscScalar *array; 1304 /* BLAS integers */ 1305 PetscBLASInt lwork,lierr; 1306 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1307 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1308 /* LAPACK working arrays for SVD or POD */ 1309 PetscBool skip_lapack; 1310 PetscScalar *work; 1311 PetscReal *singular_vals; 1312 #if defined(PETSC_USE_COMPLEX) 1313 PetscReal *rwork; 1314 #endif 1315 #if defined(PETSC_MISSING_LAPACK_GESVD) 1316 PetscBLASInt Blas_one_2=1; 1317 PetscScalar *temp_basis,*correlation_mat; 1318 #else 1319 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1320 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1321 #endif 1322 /* reuse */ 1323 PetscInt olocal_primal_size; 1324 PetscInt *oprimal_indices_local_idxs; 1325 /* change of basis */ 1326 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1327 PetscBool boolforchange,qr_needed; 1328 PetscBT touched,change_basis,qr_needed_idx; 1329 /* auxiliary stuff */ 1330 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1331 /* some quantities */ 1332 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1333 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1334 1335 1336 PetscFunctionBegin; 1337 /* Destroy Mat objects computed previously */ 1338 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1339 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1340 /* Get index sets for faces, edges and vertices from graph */ 1341 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1342 pcbddc->use_vertices = PETSC_TRUE; 1343 } 1344 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1345 /* HACK: provide functions to set change of basis */ 1346 if (!ISForVertices && pcbddc->NullSpace) { 1347 pcbddc->use_change_of_basis = PETSC_TRUE; 1348 pcbddc->use_change_on_faces = PETSC_FALSE; 1349 } 1350 /* print some info */ 1351 if (pcbddc->dbg_flag) { 1352 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1353 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1354 i = 0; 1355 if (ISForVertices) { 1356 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1357 } 1358 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1359 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1360 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1361 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1362 } 1363 /* check if near null space is attached to global mat */ 1364 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1365 if (nearnullsp) { 1366 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1367 /* remove any stored info */ 1368 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1369 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1370 /* store information for BDDC solver reuse */ 1371 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1372 pcbddc->onearnullspace = nearnullsp; 1373 ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1374 for (i=0;i<nnsp_size;i++) { 1375 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1376 } 1377 } else { /* if near null space is not provided BDDC uses constants by default */ 1378 nnsp_size = 0; 1379 nnsp_has_cnst = PETSC_TRUE; 1380 } 1381 /* get max number of constraints on a single cc */ 1382 max_constraints = nnsp_size; 1383 if (nnsp_has_cnst) max_constraints++; 1384 1385 /* 1386 Evaluate maximum storage size needed by the procedure 1387 - temp_indices will contain start index of each constraint stored as follows 1388 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1389 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1390 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1391 */ 1392 total_counts = n_ISForFaces+n_ISForEdges; 1393 total_counts *= max_constraints; 1394 n_vertices = 0; 1395 if (ISForVertices) { 1396 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1397 } 1398 total_counts += n_vertices; 1399 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1400 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1401 total_counts = 0; 1402 max_size_of_constraint = 0; 1403 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1404 if (i<n_ISForEdges) { 1405 used_IS = &ISForEdges[i]; 1406 } else { 1407 used_IS = &ISForFaces[i-n_ISForEdges]; 1408 } 1409 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1410 total_counts += j; 1411 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1412 } 1413 total_counts *= max_constraints; 1414 total_counts += n_vertices; 1415 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1416 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1417 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1418 /* get local part of global near null space vectors */ 1419 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1420 for (k=0;k<nnsp_size;k++) { 1421 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1422 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1423 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1424 } 1425 1426 /* whether or not to skip lapack calls */ 1427 skip_lapack = PETSC_TRUE; 1428 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1429 1430 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1431 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1432 PetscScalar temp_work; 1433 #if defined(PETSC_MISSING_LAPACK_GESVD) 1434 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1435 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1436 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1437 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1438 #if defined(PETSC_USE_COMPLEX) 1439 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1440 #endif 1441 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1442 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1443 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1444 lwork = -1; 1445 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1446 #if !defined(PETSC_USE_COMPLEX) 1447 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1448 #else 1449 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1450 #endif 1451 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1452 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1453 #else /* on missing GESVD */ 1454 /* SVD */ 1455 PetscInt max_n,min_n; 1456 max_n = max_size_of_constraint; 1457 min_n = max_constraints; 1458 if (max_size_of_constraint < max_constraints) { 1459 min_n = max_size_of_constraint; 1460 max_n = max_constraints; 1461 } 1462 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1463 #if defined(PETSC_USE_COMPLEX) 1464 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1465 #endif 1466 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1467 lwork = -1; 1468 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1469 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1470 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1471 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1472 #if !defined(PETSC_USE_COMPLEX) 1473 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1474 #else 1475 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1476 #endif 1477 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1478 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1479 #endif /* on missing GESVD */ 1480 /* Allocate optimal workspace */ 1481 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1482 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1483 } 1484 /* Now we can loop on constraining sets */ 1485 total_counts = 0; 1486 temp_indices[0] = 0; 1487 /* vertices */ 1488 if (ISForVertices) { 1489 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1490 if (nnsp_has_cnst) { /* consider all vertices */ 1491 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1492 for (i=0;i<n_vertices;i++) { 1493 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1494 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1495 total_counts++; 1496 } 1497 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1498 PetscBool used_vertex; 1499 for (i=0;i<n_vertices;i++) { 1500 used_vertex = PETSC_FALSE; 1501 k = 0; 1502 while (!used_vertex && k<nnsp_size) { 1503 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1504 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1505 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1506 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1507 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1508 total_counts++; 1509 used_vertex = PETSC_TRUE; 1510 } 1511 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1512 k++; 1513 } 1514 } 1515 } 1516 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1517 n_vertices = total_counts; 1518 } 1519 1520 /* edges and faces */ 1521 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1522 if (i<n_ISForEdges) { 1523 used_IS = &ISForEdges[i]; 1524 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1525 } else { 1526 used_IS = &ISForFaces[i-n_ISForEdges]; 1527 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1528 } 1529 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1530 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1531 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1532 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1533 /* change of basis should not be performed on local periodic nodes */ 1534 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1535 if (nnsp_has_cnst) { 1536 PetscScalar quad_value; 1537 temp_constraints++; 1538 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1539 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1540 for (j=0;j<size_of_constraint;j++) { 1541 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1542 } 1543 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1544 total_counts++; 1545 } 1546 for (k=0;k<nnsp_size;k++) { 1547 PetscReal real_value; 1548 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1549 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1550 for (j=0;j<size_of_constraint;j++) { 1551 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1552 } 1553 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1554 /* check if array is null on the connected component */ 1555 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1556 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1557 if (real_value > 0.0) { /* keep indices and values */ 1558 temp_constraints++; 1559 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1560 total_counts++; 1561 } 1562 } 1563 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1564 valid_constraints = temp_constraints; 1565 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1566 if (!pcbddc->use_nnsp_true && temp_constraints) { 1567 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1568 1569 #if defined(PETSC_MISSING_LAPACK_GESVD) 1570 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1571 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1572 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1573 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1574 from that computed using LAPACKgesvd 1575 -> This is due to a different computation of eigenvectors in LAPACKheev 1576 -> The quality of the POD-computed basis will be the same */ 1577 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1578 /* Store upper triangular part of correlation matrix */ 1579 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1580 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1581 for (j=0;j<temp_constraints;j++) { 1582 for (k=0;k<j+1;k++) { 1583 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1584 } 1585 } 1586 /* compute eigenvalues and eigenvectors of correlation matrix */ 1587 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1588 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1589 #if !defined(PETSC_USE_COMPLEX) 1590 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1591 #else 1592 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1593 #endif 1594 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1595 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1596 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1597 j=0; 1598 while (j < temp_constraints && singular_vals[j] < tol) j++; 1599 total_counts=total_counts-j; 1600 valid_constraints = temp_constraints-j; 1601 /* scale and copy POD basis into used quadrature memory */ 1602 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1603 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1604 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1605 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1606 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1607 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1608 if (j<temp_constraints) { 1609 PetscInt ii; 1610 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1611 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1612 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1613 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1614 for (k=0;k<temp_constraints-j;k++) { 1615 for (ii=0;ii<size_of_constraint;ii++) { 1616 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1617 } 1618 } 1619 } 1620 #else /* on missing GESVD */ 1621 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1622 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1623 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1624 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1625 #if !defined(PETSC_USE_COMPLEX) 1626 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1627 #else 1628 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1629 #endif 1630 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1631 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1632 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1633 k = temp_constraints; 1634 if (k > size_of_constraint) k = size_of_constraint; 1635 j = 0; 1636 while (j < k && singular_vals[k-j-1] < tol) j++; 1637 total_counts = total_counts-temp_constraints+k-j; 1638 valid_constraints = k-j; 1639 #endif /* on missing GESVD */ 1640 } 1641 /* setting change_of_basis flag is safe now */ 1642 if (boolforchange) { 1643 for (j=0;j<valid_constraints;j++) { 1644 PetscBTSet(change_basis,total_counts-j-1); 1645 } 1646 } 1647 } 1648 /* free index sets of faces, edges and vertices */ 1649 for (i=0;i<n_ISForFaces;i++) { 1650 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1651 } 1652 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1653 for (i=0;i<n_ISForEdges;i++) { 1654 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1655 } 1656 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1657 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1658 /* map temp_indices_to_constraint in boundary numbering */ 1659 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1660 if (i != temp_indices[total_counts]) { 1661 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1662 } 1663 1664 /* free workspace */ 1665 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1666 ierr = PetscFree(work);CHKERRQ(ierr); 1667 #if defined(PETSC_USE_COMPLEX) 1668 ierr = PetscFree(rwork);CHKERRQ(ierr); 1669 #endif 1670 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1671 #if defined(PETSC_MISSING_LAPACK_GESVD) 1672 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1673 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1674 #endif 1675 } 1676 for (k=0;k<nnsp_size;k++) { 1677 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1678 } 1679 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1680 1681 /* set quantities in pcbddc data structure and store previous primal size */ 1682 /* n_vertices defines the number of subdomain corners in the primal space */ 1683 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1684 olocal_primal_size = pcbddc->local_primal_size; 1685 pcbddc->local_primal_size = total_counts; 1686 pcbddc->n_vertices = n_vertices; 1687 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1688 1689 /* Create constraint matrix */ 1690 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1691 /* so we need to set it up properly either with or without change of basis */ 1692 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1693 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1694 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1695 /* array to compute a local numbering of constraints : vertices first then constraints */ 1696 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1697 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1698 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1699 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1700 /* auxiliary stuff for basis change */ 1701 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1702 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1703 1704 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1705 total_primal_vertices=0; 1706 for (i=0;i<pcbddc->local_primal_size;i++) { 1707 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1708 if (size_of_constraint == 1) { 1709 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1710 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1711 aux_primal_minloc[total_primal_vertices]=0; 1712 total_primal_vertices++; 1713 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1714 PetscInt min_loc,min_index; 1715 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1716 /* find first untouched local node */ 1717 k = 0; 1718 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1719 min_index = global_indices[k]; 1720 min_loc = k; 1721 /* search the minimum among global nodes already untouched on the cc */ 1722 for (k=1;k<size_of_constraint;k++) { 1723 /* there can be more than one constraint on a single connected component */ 1724 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1725 min_index = global_indices[k]; 1726 min_loc = k; 1727 } 1728 } 1729 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1730 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1731 aux_primal_minloc[total_primal_vertices]=min_loc; 1732 total_primal_vertices++; 1733 } 1734 } 1735 /* determine if a QR strategy is needed for change of basis */ 1736 qr_needed = PETSC_FALSE; 1737 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1738 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1739 if (PetscBTLookup(change_basis,i)) { 1740 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1741 j = 0; 1742 for (k=0;k<size_of_constraint;k++) { 1743 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1744 j++; 1745 } 1746 } 1747 /* found more than one primal dof on the cc */ 1748 if (j > 1) { 1749 PetscBTSet(qr_needed_idx,i); 1750 qr_needed = PETSC_TRUE; 1751 } 1752 } 1753 } 1754 /* free workspace */ 1755 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1756 1757 /* permute indices in order to have a sorted set of vertices */ 1758 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1759 1760 /* nonzero structure of constraint matrix */ 1761 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1762 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1763 j=total_primal_vertices; 1764 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1765 if (!PetscBTLookup(change_basis,i)) { 1766 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1767 j++; 1768 } 1769 } 1770 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1771 ierr = PetscFree(nnz);CHKERRQ(ierr); 1772 /* set values in constraint matrix */ 1773 for (i=0;i<total_primal_vertices;i++) { 1774 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1775 } 1776 total_counts = total_primal_vertices; 1777 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1778 if (!PetscBTLookup(change_basis,i)) { 1779 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1780 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1781 total_counts++; 1782 } 1783 } 1784 /* assembling */ 1785 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1786 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1787 /* 1788 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1789 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1790 */ 1791 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1792 if (pcbddc->use_change_of_basis) { 1793 /* dual and primal dofs on a single cc */ 1794 PetscInt dual_dofs,primal_dofs; 1795 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1796 PetscInt primal_counter; 1797 /* working stuff for GEQRF */ 1798 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1799 PetscBLASInt lqr_work; 1800 /* working stuff for UNGQR */ 1801 PetscScalar *gqr_work,lgqr_work_t; 1802 PetscBLASInt lgqr_work; 1803 /* working stuff for TRTRS */ 1804 PetscScalar *trs_rhs; 1805 PetscBLASInt Blas_NRHS; 1806 /* pointers for values insertion into change of basis matrix */ 1807 PetscInt *start_rows,*start_cols; 1808 PetscScalar *start_vals; 1809 /* working stuff for values insertion */ 1810 PetscBT is_primal; 1811 1812 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1813 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1814 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1815 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1816 /* work arrays */ 1817 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1818 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1819 /* nonzeros per row */ 1820 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1821 if (PetscBTLookup(change_basis,i)) { 1822 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1823 if (PetscBTLookup(qr_needed_idx,i)) { 1824 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1825 } else { 1826 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1827 /* get local primal index on the cc */ 1828 j = 0; 1829 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1830 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1831 } 1832 } 1833 } 1834 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1835 ierr = PetscFree(nnz);CHKERRQ(ierr); 1836 /* Set initial identity in the matrix */ 1837 for (i=0;i<pcis->n_B;i++) { 1838 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1839 } 1840 1841 if (pcbddc->dbg_flag) { 1842 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1843 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1844 } 1845 1846 1847 /* Now we loop on the constraints which need a change of basis */ 1848 /* 1849 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1850 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1851 1852 Basic blocks of change of basis matrix T computed 1853 1854 - Using the following block transformation if there is only a primal dof on the cc 1855 (in the example, primal dof is the last one of the edge in LOCAL ordering 1856 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1857 | 1 0 ... 0 1 | 1858 | 0 1 ... 0 1 | 1859 | ... | 1860 | 0 ... 1 1 | 1861 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1862 1863 - via QR decomposition of constraints otherwise 1864 */ 1865 if (qr_needed) { 1866 /* space to store Q */ 1867 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1868 /* first we issue queries for optimal work */ 1869 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1870 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1871 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1872 lqr_work = -1; 1873 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1874 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1875 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1876 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1877 lgqr_work = -1; 1878 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1879 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1880 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1881 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1882 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1883 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1884 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1885 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1886 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1887 /* array to store scaling factors for reflectors */ 1888 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1889 /* array to store rhs and solution of triangular solver */ 1890 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1891 /* allocating workspace for check */ 1892 if (pcbddc->dbg_flag) { 1893 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1894 } 1895 } 1896 /* array to store whether a node is primal or not */ 1897 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1898 ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr); 1899 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 1900 if (i != total_primal_vertices) { 1901 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 1902 } 1903 for (i=0;i<total_primal_vertices;i++) { 1904 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 1905 } 1906 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 1907 1908 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1909 /* -> using implicit ordering contained in temp_indices data */ 1910 total_counts = pcbddc->n_vertices; 1911 primal_counter = total_counts; 1912 while (total_counts<pcbddc->local_primal_size) { 1913 primal_dofs = 1; 1914 if (PetscBTLookup(change_basis,total_counts)) { 1915 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1916 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1917 primal_dofs++; 1918 } 1919 /* get constraint info */ 1920 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1921 dual_dofs = size_of_constraint-primal_dofs; 1922 1923 if (pcbddc->dbg_flag) { 1924 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1925 } 1926 1927 if (primal_dofs > 1) { /* QR */ 1928 1929 /* copy quadrature constraints for change of basis check */ 1930 if (pcbddc->dbg_flag) { 1931 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1932 } 1933 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1934 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1935 1936 /* compute QR decomposition of constraints */ 1937 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1938 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1939 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1940 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1941 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1942 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1943 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1944 1945 /* explictly compute R^-T */ 1946 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1947 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1948 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1949 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1950 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1951 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1952 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1953 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1954 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1955 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1956 1957 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1958 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1959 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1960 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1961 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1962 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1963 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1964 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1965 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1966 1967 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1968 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1969 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1970 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1971 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1972 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1973 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1974 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1975 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1976 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1977 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1978 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1979 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1980 1981 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1982 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1983 /* insert cols for primal dofs */ 1984 for (j=0;j<primal_dofs;j++) { 1985 start_vals = &qr_basis[j*size_of_constraint]; 1986 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1987 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1988 } 1989 /* insert cols for dual dofs */ 1990 for (j=0,k=0;j<dual_dofs;k++) { 1991 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1992 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1993 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1994 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1995 j++; 1996 } 1997 } 1998 1999 /* check change of basis */ 2000 if (pcbddc->dbg_flag) { 2001 PetscInt ii,jj; 2002 PetscBool valid_qr=PETSC_TRUE; 2003 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2004 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2005 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2006 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2007 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2008 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2009 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2010 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2011 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2012 for (jj=0;jj<size_of_constraint;jj++) { 2013 for (ii=0;ii<primal_dofs;ii++) { 2014 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2015 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2016 } 2017 } 2018 if (!valid_qr) { 2019 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2020 for (jj=0;jj<size_of_constraint;jj++) { 2021 for (ii=0;ii<primal_dofs;ii++) { 2022 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2023 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2024 } 2025 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2026 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2027 } 2028 } 2029 } 2030 } else { 2031 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2032 } 2033 } 2034 } else { /* simple transformation block */ 2035 PetscInt row,col; 2036 PetscScalar val; 2037 for (j=0;j<size_of_constraint;j++) { 2038 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2039 if (!PetscBTLookup(is_primal,row)) { 2040 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2041 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2042 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2043 } else { 2044 for (k=0;k<size_of_constraint;k++) { 2045 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2046 if (row != col) { 2047 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2048 } else { 2049 val = 1.0; 2050 } 2051 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2052 } 2053 } 2054 } 2055 } 2056 /* increment primal counter */ 2057 primal_counter += primal_dofs; 2058 } else { 2059 if (pcbddc->dbg_flag) { 2060 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2061 } 2062 } 2063 /* increment constraint counter total_counts */ 2064 total_counts += primal_dofs; 2065 } 2066 2067 /* free workspace */ 2068 if (qr_needed) { 2069 if (pcbddc->dbg_flag) { 2070 ierr = PetscFree(work);CHKERRQ(ierr); 2071 } 2072 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2073 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2074 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2075 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2076 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2077 } 2078 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2079 /* assembling */ 2080 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2081 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2082 /* 2083 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2084 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2085 */ 2086 } 2087 2088 /* get indices in local ordering for vertices and constraints */ 2089 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2090 ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr); 2091 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2092 } 2093 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2094 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2095 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2096 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2097 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2098 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2099 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2100 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2101 /* set quantities in PCBDDC data struct */ 2102 pcbddc->n_actual_vertices = i; 2103 /* check if a new primal space has been introduced */ 2104 pcbddc->new_primal_space_local = PETSC_TRUE; 2105 if (olocal_primal_size == pcbddc->local_primal_size) { 2106 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2107 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2108 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2109 } 2110 /* need to set pcbddc->new_primal_space_local to true when the local primal space is empty, otherwise pcbddc->is_R_local will not be created */ 2111 if (!pcbddc->local_primal_size) pcbddc->new_primal_space_local = PETSC_TRUE; 2112 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2113 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2114 2115 /* flush dbg viewer */ 2116 if (pcbddc->dbg_flag) { 2117 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2118 } 2119 2120 /* free workspace */ 2121 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2122 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2123 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2124 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2125 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2126 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2127 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2128 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2129 PetscFunctionReturn(0); 2130 } 2131 2132 #undef __FUNCT__ 2133 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2134 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2135 { 2136 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2137 PC_IS *pcis = (PC_IS*)pc->data; 2138 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2139 PetscInt ierr,i,vertex_size; 2140 PetscViewer viewer=pcbddc->dbg_viewer; 2141 2142 PetscFunctionBegin; 2143 /* Reset previously computed graph */ 2144 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2145 /* Init local Graph struct */ 2146 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2147 2148 /* Check validity of the csr graph passed in by the user */ 2149 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2150 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2151 } 2152 2153 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2154 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2155 Mat mat_adj; 2156 const PetscInt *xadj,*adjncy; 2157 PetscBool flg_row=PETSC_TRUE; 2158 2159 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2160 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2161 if (!flg_row) { 2162 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2163 } 2164 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2165 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2166 if (!flg_row) { 2167 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2168 } 2169 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2170 } 2171 2172 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2173 vertex_size = 1; 2174 if (pcbddc->user_provided_isfordofs) { 2175 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2176 ierr = PetscMalloc(pcbddc->n_ISForDofs*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2177 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2178 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2179 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2180 } 2181 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2182 pcbddc->n_ISForDofs = 0; 2183 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2184 } 2185 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2186 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2187 } else { 2188 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2189 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2190 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2191 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2192 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2193 } 2194 } 2195 } 2196 2197 /* Setup of Graph */ 2198 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2199 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2200 } 2201 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2202 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2203 } 2204 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2205 2206 /* Graph's connected components analysis */ 2207 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2208 2209 /* print some info to stdout */ 2210 if (pcbddc->dbg_flag) { 2211 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2212 } 2213 2214 /* mark topography has done */ 2215 pcbddc->recompute_topography = PETSC_FALSE; 2216 PetscFunctionReturn(0); 2217 } 2218 2219 #undef __FUNCT__ 2220 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2221 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2222 { 2223 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2224 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2225 PetscErrorCode ierr; 2226 2227 PetscFunctionBegin; 2228 n = 0; 2229 vertices = 0; 2230 if (pcbddc->ConstraintMatrix) { 2231 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2232 for (i=0;i<local_primal_size;i++) { 2233 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2234 if (size_of_constraint == 1) n++; 2235 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2236 } 2237 if (vertices_idx) { 2238 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2239 n = 0; 2240 for (i=0;i<local_primal_size;i++) { 2241 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2242 if (size_of_constraint == 1) { 2243 vertices[n++]=row_cmat_indices[0]; 2244 } 2245 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2246 } 2247 } 2248 } 2249 *n_vertices = n; 2250 if (vertices_idx) *vertices_idx = vertices; 2251 PetscFunctionReturn(0); 2252 } 2253 2254 #undef __FUNCT__ 2255 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2256 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2257 { 2258 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2259 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2260 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2261 PetscBT touched; 2262 PetscErrorCode ierr; 2263 2264 /* This function assumes that the number of local constraints per connected component 2265 is not greater than the number of nodes defined for the connected component 2266 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2267 PetscFunctionBegin; 2268 n = 0; 2269 constraints_index = 0; 2270 if (pcbddc->ConstraintMatrix) { 2271 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2272 max_size_of_constraint = 0; 2273 for (i=0;i<local_primal_size;i++) { 2274 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2275 if (size_of_constraint > 1) { 2276 n++; 2277 } 2278 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2279 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2280 } 2281 if (constraints_idx) { 2282 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2283 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2284 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2285 n = 0; 2286 for (i=0;i<local_primal_size;i++) { 2287 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2288 if (size_of_constraint > 1) { 2289 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2290 /* find first untouched local node */ 2291 j = 0; 2292 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2293 min_index = row_cmat_global_indices[j]; 2294 min_loc = j; 2295 /* search the minimum among nodes not yet touched on the connected component 2296 since there can be more than one constraint on a single cc */ 2297 for (j=1;j<size_of_constraint;j++) { 2298 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2299 min_index = row_cmat_global_indices[j]; 2300 min_loc = j; 2301 } 2302 } 2303 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2304 constraints_index[n++] = row_cmat_indices[min_loc]; 2305 } 2306 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2307 } 2308 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2309 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2310 } 2311 } 2312 *n_constraints = n; 2313 if (constraints_idx) *constraints_idx = constraints_index; 2314 PetscFunctionReturn(0); 2315 } 2316 2317 /* the next two functions has been adapted from pcis.c */ 2318 #undef __FUNCT__ 2319 #define __FUNCT__ "PCBDDCApplySchur" 2320 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2321 { 2322 PetscErrorCode ierr; 2323 PC_IS *pcis = (PC_IS*)(pc->data); 2324 2325 PetscFunctionBegin; 2326 if (!vec2_B) { vec2_B = v; } 2327 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2328 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2329 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2330 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2331 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2332 PetscFunctionReturn(0); 2333 } 2334 2335 #undef __FUNCT__ 2336 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2337 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2338 { 2339 PetscErrorCode ierr; 2340 PC_IS *pcis = (PC_IS*)(pc->data); 2341 2342 PetscFunctionBegin; 2343 if (!vec2_B) { vec2_B = v; } 2344 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2345 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2346 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2347 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2348 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2349 PetscFunctionReturn(0); 2350 } 2351 2352 #undef __FUNCT__ 2353 #define __FUNCT__ "PCBDDCSubsetNumbering" 2354 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2355 { 2356 Vec local_vec,global_vec; 2357 IS seqis,paris; 2358 VecScatter scatter_ctx; 2359 PetscScalar *array; 2360 PetscInt *temp_global_dofs; 2361 PetscScalar globalsum; 2362 PetscInt i,j,s; 2363 PetscInt nlocals,first_index,old_index,max_local; 2364 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2365 PetscMPIInt *dof_sizes,*dof_displs; 2366 PetscBool first_found; 2367 PetscErrorCode ierr; 2368 2369 PetscFunctionBegin; 2370 /* mpi buffers */ 2371 MPI_Comm_size(comm,&size_prec_comm); 2372 MPI_Comm_rank(comm,&rank_prec_comm); 2373 j = ( !rank_prec_comm ? size_prec_comm : 0); 2374 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2375 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2376 /* get maximum size of subset */ 2377 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2378 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2379 max_local = 0; 2380 if (n_local_dofs) { 2381 max_local = temp_global_dofs[0]; 2382 for (i=1;i<n_local_dofs;i++) { 2383 if (max_local < temp_global_dofs[i] ) { 2384 max_local = temp_global_dofs[i]; 2385 } 2386 } 2387 } 2388 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2389 max_global++; 2390 max_local = 0; 2391 if (n_local_dofs) { 2392 max_local = local_dofs[0]; 2393 for (i=1;i<n_local_dofs;i++) { 2394 if (max_local < local_dofs[i] ) { 2395 max_local = local_dofs[i]; 2396 } 2397 } 2398 } 2399 max_local++; 2400 /* allocate workspace */ 2401 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2402 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2403 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2404 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2405 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2406 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2407 /* create scatter */ 2408 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2409 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2410 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2411 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2412 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2413 /* init array */ 2414 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2415 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2416 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2417 if (local_dofs_mult) { 2418 for (i=0;i<n_local_dofs;i++) { 2419 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2420 } 2421 } else { 2422 for (i=0;i<n_local_dofs;i++) { 2423 array[local_dofs[i]]=1.0; 2424 } 2425 } 2426 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2427 /* scatter into global vec and get total number of global dofs */ 2428 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2429 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2430 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2431 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2432 /* Fill global_vec with cumulative function for global numbering */ 2433 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2434 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2435 nlocals = 0; 2436 first_index = -1; 2437 first_found = PETSC_FALSE; 2438 for (i=0;i<s;i++) { 2439 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2440 first_found = PETSC_TRUE; 2441 first_index = i; 2442 } 2443 nlocals += (PetscInt)PetscRealPart(array[i]); 2444 } 2445 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2446 if (!rank_prec_comm) { 2447 dof_displs[0]=0; 2448 for (i=1;i<size_prec_comm;i++) { 2449 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2450 } 2451 } 2452 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2453 if (first_found) { 2454 array[first_index] += (PetscScalar)nlocals; 2455 old_index = first_index; 2456 for (i=first_index+1;i<s;i++) { 2457 if (PetscRealPart(array[i]) > 0.0) { 2458 array[i] += array[old_index]; 2459 old_index = i; 2460 } 2461 } 2462 } 2463 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2464 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2465 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2466 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2467 /* get global ordering of local dofs */ 2468 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2469 if (local_dofs_mult) { 2470 for (i=0;i<n_local_dofs;i++) { 2471 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2472 } 2473 } else { 2474 for (i=0;i<n_local_dofs;i++) { 2475 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2476 } 2477 } 2478 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2479 /* free workspace */ 2480 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2481 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2482 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2483 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2484 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2485 /* return pointer to global ordering of local dofs */ 2486 *global_numbering_subset = temp_global_dofs; 2487 PetscFunctionReturn(0); 2488 } 2489 2490 #undef __FUNCT__ 2491 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2492 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2493 { 2494 PetscInt i,j; 2495 PetscScalar *alphas; 2496 PetscErrorCode ierr; 2497 2498 PetscFunctionBegin; 2499 /* this implements stabilized Gram-Schmidt */ 2500 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2501 for (i=0;i<n;i++) { 2502 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2503 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2504 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2505 } 2506 ierr = PetscFree(alphas);CHKERRQ(ierr); 2507 PetscFunctionReturn(0); 2508 } 2509 2510 /* TODO 2511 - now preallocation is done assuming SEQDENSE local matrices 2512 */ 2513 #undef __FUNCT__ 2514 #define __FUNCT__ "MatISGetMPIXAIJ" 2515 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M) 2516 { 2517 Mat new_mat; 2518 Mat_IS *matis = (Mat_IS*)(mat->data); 2519 /* info on mat */ 2520 /* ISLocalToGlobalMapping rmapping,cmapping; */ 2521 PetscInt bs,rows,cols; 2522 PetscInt lrows,lcols; 2523 PetscInt local_rows,local_cols; 2524 PetscBool isdense; 2525 /* values insertion */ 2526 PetscScalar *array; 2527 PetscInt *local_indices,*global_indices; 2528 /* work */ 2529 PetscInt i,j,index_row; 2530 PetscErrorCode ierr; 2531 2532 PetscFunctionBegin; 2533 /* MISSING CHECKS 2534 - rectangular case not covered (it is not allowed by MATIS) 2535 */ 2536 /* get info from mat */ 2537 /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */ 2538 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2539 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2540 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2541 2542 /* work */ 2543 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2544 for (i=0;i<local_rows;i++) local_indices[i]=i; 2545 /* map indices of local mat to global values */ 2546 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2547 /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */ 2548 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2549 2550 if (reuse==MAT_INITIAL_MATRIX) { 2551 Vec vec_dnz,vec_onz; 2552 PetscScalar *my_dnz,*my_onz; 2553 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2554 PetscInt index_col,owner; 2555 PetscMPIInt nsubdomains; 2556 2557 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2558 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2559 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2560 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2561 ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr); 2562 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2563 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2564 2565 /* 2566 preallocation 2567 */ 2568 2569 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2570 /* 2571 Some vectors are needed to sum up properly on shared interface dofs. 2572 Preallocation macros cannot do the job. 2573 Note that preallocation is not exact, since it overestimates nonzeros 2574 */ 2575 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2576 /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */ 2577 ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr); 2578 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2579 /* All processes need to compute entire row ownership */ 2580 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2581 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2582 for (i=0;i<nsubdomains;i++) { 2583 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2584 row_ownership[j]=i; 2585 } 2586 } 2587 2588 /* 2589 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2590 then, they will be summed up properly. This way, preallocation is always sufficient 2591 */ 2592 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2593 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2594 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2595 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2596 for (i=0;i<local_rows;i++) { 2597 index_row = global_indices[i]; 2598 for (j=i;j<local_rows;j++) { 2599 owner = row_ownership[index_row]; 2600 index_col = global_indices[j]; 2601 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2602 my_dnz[i] += 1.0; 2603 } else { /* offdiag block */ 2604 my_onz[i] += 1.0; 2605 } 2606 /* same as before, interchanging rows and cols */ 2607 if (i != j) { 2608 owner = row_ownership[index_col]; 2609 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2610 my_dnz[j] += 1.0; 2611 } else { 2612 my_onz[j] += 1.0; 2613 } 2614 } 2615 } 2616 } 2617 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2618 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2619 if (local_rows) { /* multilevel guard */ 2620 ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2621 ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2622 } 2623 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2624 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2625 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2626 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2627 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2628 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2629 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2630 2631 /* set computed preallocation in dnz and onz */ 2632 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2633 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2634 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2635 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2636 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2637 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2638 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2639 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2640 2641 /* Resize preallocation if overestimated */ 2642 for (i=0;i<lrows;i++) { 2643 dnz[i] = PetscMin(dnz[i],lcols); 2644 onz[i] = PetscMin(onz[i],cols-lcols); 2645 } 2646 /* set preallocation */ 2647 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2648 for (i=0;i<lrows/bs;i++) { 2649 dnz[i] = dnz[i*bs]/bs; 2650 onz[i] = onz[i*bs]/bs; 2651 } 2652 ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2653 for (i=0;i<lrows/bs;i++) { 2654 dnz[i] = dnz[i]-i; 2655 } 2656 ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2657 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2658 *M = new_mat; 2659 } else { 2660 PetscInt mbs,mrows,mcols; 2661 /* some checks */ 2662 ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); 2663 ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); 2664 if (mrows != rows) { 2665 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); 2666 } 2667 if (mrows != rows) { 2668 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); 2669 } 2670 if (mbs != bs) { 2671 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); 2672 } 2673 ierr = MatZeroEntries(*M);CHKERRQ(ierr); 2674 } 2675 /* set local to global mappings */ 2676 /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */ 2677 /* Set values */ 2678 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2679 if (isdense) { /* special case for dense local matrices */ 2680 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2681 ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr); 2682 ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2683 ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr); 2684 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2685 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2686 } else { /* very basic values insertion for all other matrix types */ 2687 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2688 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2689 for (i=0;i<local_rows;i++) { 2690 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2691 /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */ 2692 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2693 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2694 ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2695 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2696 } 2697 } 2698 ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2699 ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2700 if (isdense) { 2701 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2702 } 2703 PetscFunctionReturn(0); 2704 } 2705 2706 #undef __FUNCT__ 2707 #define __FUNCT__ "MatISGetSubassemblingPattern" 2708 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2709 { 2710 Mat subdomain_adj; 2711 IS new_ranks,ranks_send_to; 2712 MatPartitioning partitioner; 2713 Mat_IS *matis; 2714 PetscInt n_neighs,*neighs,*n_shared,**shared; 2715 PetscInt prank; 2716 PetscMPIInt size,rank,color; 2717 PetscInt *xadj,*adjncy,*oldranks; 2718 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2719 PetscInt i,j,n_subdomains,local_size,threshold=0; 2720 PetscErrorCode ierr; 2721 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2722 PetscSubcomm subcomm; 2723 2724 PetscFunctionBegin; 2725 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2726 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2727 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2728 2729 /* Get info on mapping */ 2730 matis = (Mat_IS*)(mat->data); 2731 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2732 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2733 2734 /* build local CSR graph of subdomains' connectivity */ 2735 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2736 xadj[0] = 0; 2737 xadj[1] = PetscMax(n_neighs-1,0); 2738 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2739 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2740 2741 if (threshold) { 2742 PetscInt* count,min_threshold; 2743 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2744 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2745 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2746 for (j=0;j<n_shared[i];j++) { 2747 count[shared[i][j]] += 1; 2748 } 2749 } 2750 /* adapt threshold since we dont want to lose significant connections */ 2751 min_threshold = n_neighs; 2752 for (i=1;i<n_neighs;i++) { 2753 for (j=0;j<n_shared[i];j++) { 2754 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2755 } 2756 } 2757 threshold = PetscMax(min_threshold+1,threshold); 2758 xadj[1] = 0; 2759 for (i=1;i<n_neighs;i++) { 2760 for (j=0;j<n_shared[i];j++) { 2761 if (count[shared[i][j]] < threshold) { 2762 adjncy[xadj[1]] = neighs[i]; 2763 adjncy_wgt[xadj[1]] = n_shared[i]; 2764 xadj[1]++; 2765 break; 2766 } 2767 } 2768 } 2769 ierr = PetscFree(count);CHKERRQ(ierr); 2770 } else { 2771 if (xadj[1]) { 2772 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2773 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2774 } 2775 } 2776 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2777 if (use_square) { 2778 for (i=0;i<xadj[1];i++) { 2779 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2780 } 2781 } 2782 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2783 2784 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2785 2786 /* 2787 Restrict work on active processes only. 2788 */ 2789 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2790 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2791 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2792 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2793 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2794 if (color) { 2795 ierr = PetscFree(xadj);CHKERRQ(ierr); 2796 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2797 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2798 } else { 2799 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2800 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2801 prank = rank; 2802 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2803 /* 2804 for (i=0;i<size;i++) { 2805 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2806 } 2807 */ 2808 for (i=0;i<xadj[1];i++) { 2809 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2810 } 2811 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2812 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2813 n_subdomains = (PetscInt)size; 2814 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2815 2816 /* Partition */ 2817 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2818 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2819 if (use_vwgt) { 2820 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2821 v_wgt[0] = local_size; 2822 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2823 } 2824 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); */ 2825 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2826 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2827 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2828 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2829 2830 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2831 /* ranks_send_to_idx[0] = oldranks[is_indices[0]]; */ /* contiguos set of processes */ 2832 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2833 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2834 /* clean up */ 2835 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2836 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2837 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2838 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2839 } 2840 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2841 2842 /* assemble parallel IS for sends */ 2843 i = 1; 2844 if (color) i=0; 2845 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2846 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2847 2848 /* get back IS */ 2849 *is_sends = ranks_send_to; 2850 PetscFunctionReturn(0); 2851 } 2852 2853 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2854 2855 #undef __FUNCT__ 2856 #define __FUNCT__ "MatISSubassemble" 2857 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, MatReuse reuse, Mat *mat_n) 2858 { 2859 Mat local_mat; 2860 Mat_IS *matis; 2861 IS is_sends_internal; 2862 PetscInt rows,cols; 2863 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2864 PetscBool ismatis,isdense; 2865 ISLocalToGlobalMapping l2gmap; 2866 PetscInt* l2gmap_indices; 2867 const PetscInt* is_indices; 2868 MatType new_local_type; 2869 MatTypePrivate new_local_type_private; 2870 /* buffers */ 2871 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2872 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2873 /* MPI */ 2874 MPI_Comm comm; 2875 PetscMPIInt n_sends,n_recvs,commsize; 2876 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2877 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2878 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2879 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2880 PetscErrorCode ierr; 2881 2882 PetscFunctionBegin; 2883 /* checks */ 2884 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2885 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2886 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2887 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2888 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2889 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2890 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2891 if (reuse == MAT_REUSE_MATRIX) { 2892 PetscInt mrows,mcols,mnrows,mncols; 2893 PetscBool ismatis; 2894 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2895 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2896 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2897 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2898 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2899 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2900 } 2901 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2902 PetscValidLogicalCollectiveInt(mat,bs,0); 2903 /* prepare IS for sending if not provided */ 2904 if (!is_sends) { 2905 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2906 ierr = MatISGetSubassemblingPattern(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2907 } else { 2908 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2909 is_sends_internal = is_sends; 2910 } 2911 2912 /* get pointer of MATIS data */ 2913 matis = (Mat_IS*)mat->data; 2914 2915 /* get comm */ 2916 comm = PetscObjectComm((PetscObject)mat); 2917 2918 /* compute number of sends */ 2919 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2920 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2921 2922 /* compute number of receives */ 2923 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2924 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2925 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2926 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2927 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2928 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2929 ierr = PetscFree(iflags);CHKERRQ(ierr); 2930 2931 /* prepare send/receive buffers */ 2932 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2933 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2934 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2935 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2936 2937 /* Get data from local mat */ 2938 if (!isdense) { 2939 /* TODO: See below some guidelines on how to prepare the local buffers */ 2940 /* 2941 send_buffer_vals should contain the raw values of the local matrix 2942 send_buffer_idxs should contain: 2943 - MatType_PRIVATE type 2944 - PetscInt size_of_l2gmap 2945 - PetscInt global_row_indices[size_of_l2gmap] 2946 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2947 */ 2948 } else { 2949 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2950 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2951 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2952 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2953 send_buffer_idxs[1] = i; 2954 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2955 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2956 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2957 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2958 for (i=0;i<n_sends;i++) { 2959 ilengths_vals[is_indices[i]] = len*len; 2960 ilengths_idxs[is_indices[i]] = len+2; 2961 } 2962 } 2963 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2964 buf_size_idxs = 0; 2965 buf_size_vals = 0; 2966 for (i=0;i<n_recvs;i++) { 2967 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2968 buf_size_vals += (PetscInt)olengths_vals[i]; 2969 } 2970 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2971 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2972 2973 /* get new tags for clean communications */ 2974 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2975 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2976 2977 /* allocate for requests */ 2978 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2979 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2980 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2981 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2982 2983 /* communications */ 2984 ptr_idxs = recv_buffer_idxs; 2985 ptr_vals = recv_buffer_vals; 2986 for (i=0;i<n_recvs;i++) { 2987 source_dest = onodes[i]; 2988 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2989 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2990 ptr_idxs += olengths_idxs[i]; 2991 ptr_vals += olengths_vals[i]; 2992 } 2993 for (i=0;i<n_sends;i++) { 2994 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2995 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2996 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2997 } 2998 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2999 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3000 3001 /* assemble new l2g map */ 3002 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3003 ptr_idxs = recv_buffer_idxs; 3004 buf_size_idxs = 0; 3005 for (i=0;i<n_recvs;i++) { 3006 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3007 ptr_idxs += olengths_idxs[i]; 3008 } 3009 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 3010 ptr_idxs = recv_buffer_idxs; 3011 buf_size_idxs = 0; 3012 for (i=0;i<n_recvs;i++) { 3013 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3014 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3015 ptr_idxs += olengths_idxs[i]; 3016 } 3017 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3018 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3019 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3020 3021 /* infer new local matrix type from received local matrices type */ 3022 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3023 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3024 new_local_type_private = MATAIJ_PRIVATE; 3025 new_local_type = MATSEQAIJ; 3026 if (n_recvs) { 3027 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3028 ptr_idxs = recv_buffer_idxs; 3029 for (i=0;i<n_recvs;i++) { 3030 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3031 new_local_type_private = MATAIJ_PRIVATE; 3032 break; 3033 } 3034 ptr_idxs += olengths_idxs[i]; 3035 } 3036 switch (new_local_type_private) { 3037 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 3038 new_local_type = MATSEQAIJ; 3039 bs = 1; 3040 break; 3041 case MATAIJ_PRIVATE: 3042 new_local_type = MATSEQAIJ; 3043 bs = 1; 3044 break; 3045 case MATBAIJ_PRIVATE: 3046 new_local_type = MATSEQBAIJ; 3047 break; 3048 case MATSBAIJ_PRIVATE: 3049 new_local_type = MATSEQSBAIJ; 3050 break; 3051 default: 3052 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3053 break; 3054 } 3055 } 3056 3057 /* create MATIS object if needed */ 3058 if (reuse == MAT_INITIAL_MATRIX) { 3059 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3060 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3061 } else { 3062 /* it also destroys the local matrices */ 3063 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3064 } 3065 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3066 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3067 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3068 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3069 3070 /* set values */ 3071 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3072 ptr_vals = recv_buffer_vals; 3073 ptr_idxs = recv_buffer_idxs; 3074 for (i=0;i<n_recvs;i++) { 3075 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3076 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3077 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3078 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3079 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3080 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3081 } 3082 ptr_idxs += olengths_idxs[i]; 3083 ptr_vals += olengths_vals[i]; 3084 } 3085 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3086 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3087 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3088 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3089 3090 { /* check */ 3091 Vec lvec,rvec; 3092 PetscReal infty_error; 3093 3094 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3095 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3096 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3097 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3098 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3099 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3100 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3101 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3102 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3103 } 3104 3105 /* free workspace */ 3106 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3107 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3108 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3109 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3110 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3111 if (isdense) { 3112 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3113 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3114 } else { 3115 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3116 } 3117 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3118 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3119 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3120 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3121 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3122 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3123 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3124 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3125 ierr = PetscFree(onodes);CHKERRQ(ierr); 3126 PetscFunctionReturn(0); 3127 } 3128 3129 #undef __FUNCT__ 3130 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3131 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3132 { 3133 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3134 PC_IS *pcis = (PC_IS*)pc->data; 3135 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3136 MatNullSpace CoarseNullSpace=NULL; 3137 ISLocalToGlobalMapping coarse_islg; 3138 IS coarse_is; 3139 PetscInt max_it; 3140 PetscInt im_active=-1,active_procs=-1; 3141 PC pc_temp; 3142 PCType coarse_pc_type; 3143 KSPType coarse_ksp_type; 3144 PetscBool multilevel_requested,multilevel_allowed; 3145 PetscBool setsym,issym,isherm,ispreonly,isbddc,isnn,coarse_reuse; 3146 MatStructure matstruct; 3147 PetscErrorCode ierr; 3148 3149 PetscFunctionBegin; 3150 /* Assign global numbering to coarse dofs */ 3151 if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */ 3152 PetscInt ocoarse_size; 3153 ocoarse_size = pcbddc->coarse_size; 3154 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3155 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3156 /* see if we can avoid some work */ 3157 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3158 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3159 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3160 coarse_reuse = PETSC_FALSE; 3161 } else { /* we can safely reuse already computed coarse matrix */ 3162 coarse_reuse = PETSC_TRUE; 3163 } 3164 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3165 coarse_reuse = PETSC_FALSE; 3166 } 3167 /* reset any subassembling information */ 3168 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3169 } else { /* primal space has not been changed, so we can reuse coarse matrix */ 3170 coarse_reuse = PETSC_TRUE; 3171 } 3172 3173 /* test if we can go multilevel */ 3174 multilevel_allowed = PETSC_FALSE; 3175 multilevel_requested = PETSC_FALSE; 3176 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 3177 if (multilevel_requested) { 3178 /* count "active processes" */ 3179 im_active = !!(pcis->n); 3180 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3181 if (active_procs/pcbddc->coarsening_ratio < 2) { 3182 multilevel_allowed = PETSC_FALSE; 3183 } else { 3184 multilevel_allowed = PETSC_TRUE; 3185 } 3186 } 3187 3188 /* set defaults for coarse KSP and PC */ 3189 if (multilevel_allowed) { 3190 coarse_ksp_type = KSPRICHARDSON; 3191 coarse_pc_type = PCBDDC; 3192 } else { 3193 coarse_ksp_type = KSPPREONLY; 3194 coarse_pc_type = PCREDUNDANT; 3195 } 3196 3197 /* create the coarse KSP object only once with defaults */ 3198 if (!pcbddc->coarse_ksp) { 3199 char prefix[256],str_level[3]; 3200 size_t len; 3201 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3202 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3203 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3204 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3205 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3206 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3207 /* prefix */ 3208 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3209 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3210 if (!pcbddc->current_level) { 3211 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3212 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3213 } else { 3214 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3215 if (pcbddc->current_level>1) len -= 2; 3216 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3217 *(prefix+len)='\0'; 3218 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3219 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3220 } 3221 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3222 } 3223 /* allow user customization */ 3224 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3225 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3226 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3227 3228 /* get some info after set from options */ 3229 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3230 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3231 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3232 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3233 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3234 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3235 isbddc = PETSC_FALSE; 3236 } 3237 3238 /* creates temporary l2gmap and IS for coarse indexes */ 3239 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3240 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3241 3242 /* propagate BDDC info to the next level */ 3243 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3244 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3245 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3246 if (isbddc) { /* protects from unneded computations */ 3247 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3248 const PetscInt *idxs; 3249 IS *c_isfordofs,c_isneumann; 3250 ISLocalToGlobalMapping tmap; 3251 3252 /* create map between primal indices (in local representative ordering) and local subdomain numbering */ 3253 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3254 /* allocate space for temporary storage */ 3255 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3256 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3257 /* dofs splitting */ 3258 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&c_isfordofs);CHKERRQ(ierr); 3259 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3260 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3261 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3262 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3263 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3264 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3265 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3266 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&c_isfordofs[i]);CHKERRQ(ierr); 3267 /* ierr = ISView(c_isfordofs[i],0);CHKERRQ(ierr); */ 3268 } 3269 ierr = PCBDDCSetDofsSplitting(pc_temp,pcbddc->n_ISForDofsLocal,c_isfordofs);CHKERRQ(ierr); 3270 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3271 ierr = ISDestroy(&c_isfordofs[i]);CHKERRQ(ierr); 3272 } 3273 ierr = PetscFree(c_isfordofs);CHKERRQ(ierr); 3274 /* neumann boundaries */ 3275 if (pcbddc->NeumannBoundariesLocal) { 3276 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3277 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3278 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3279 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3280 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3281 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3282 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&c_isneumann);CHKERRQ(ierr); 3283 /* ierr = ISView(c_isneumann,0);CHKERRQ(ierr); */ 3284 ierr = PCBDDCSetNeumannBoundaries(pc_temp,c_isneumann);CHKERRQ(ierr); 3285 } 3286 ierr = ISDestroy(&c_isneumann);CHKERRQ(ierr); 3287 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3288 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3289 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3290 } 3291 3292 /* creates temporary MATIS object for coarse matrix */ 3293 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3294 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3295 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3296 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3297 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3298 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3299 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3300 3301 /* assemble coarse matrix */ 3302 if (isbddc || isnn) { 3303 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3304 ierr = MatISGetSubassemblingPattern(coarse_mat_is,pcbddc->coarsening_ratio,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3305 } 3306 if (coarse_reuse) { 3307 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3308 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3309 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,pcbddc->coarsening_ratio,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr); 3310 } else { 3311 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,pcbddc->coarsening_ratio,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3312 } 3313 } else { 3314 if (coarse_reuse) { 3315 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3316 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3317 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr); 3318 } else { 3319 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3320 } 3321 } 3322 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3323 3324 /* create local to global scatters for coarse problem */ 3325 if (pcbddc->new_primal_space) { 3326 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3327 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 3328 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3329 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3330 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3331 } 3332 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3333 3334 /* propagate symmetry info to coarse matrix */ 3335 issym = PETSC_FALSE; 3336 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3337 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3338 isherm = PETSC_FALSE; 3339 ierr = MatIsHermitianKnown(pc->pmat,&setsym,&isherm);CHKERRQ(ierr); 3340 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3341 3342 /* Compute coarse null space (special handling by BDDC only) */ 3343 if (pcbddc->NullSpace) { 3344 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3345 if (isbddc) { 3346 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3347 } else { 3348 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3349 } 3350 } 3351 3352 /* set operators */ 3353 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3354 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3355 3356 /* additional KSP customization */ 3357 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3358 if (max_it < 5) { 3359 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3360 } 3361 3362 /* print some info if requested */ 3363 if (pcbddc->dbg_flag) { 3364 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3365 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3366 if (!multilevel_allowed) { 3367 if (multilevel_requested) { 3368 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3369 } else if (pcbddc->max_levels) { 3370 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3371 } 3372 } 3373 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3374 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3375 } 3376 3377 /* setup coarse ksp */ 3378 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3379 3380 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3381 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3382 if (pcbddc->dbg_flag || !ispreonly) { 3383 KSP check_ksp; 3384 KSPType check_ksp_type; 3385 PC check_pc; 3386 Vec check_vec; 3387 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3388 PetscInt its; 3389 PetscBool compute; 3390 3391 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3392 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3393 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3394 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3395 if (ispreonly) { 3396 check_ksp_type = KSPPREONLY; 3397 compute = PETSC_FALSE; 3398 } else { 3399 check_ksp_type = KSPGMRES; 3400 compute = PETSC_TRUE; 3401 } 3402 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3403 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3404 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3405 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3406 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3407 /* create random vec */ 3408 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3409 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3410 if (CoarseNullSpace) { 3411 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3412 } 3413 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3414 /* solve coarse problem */ 3415 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3416 if (CoarseNullSpace) { 3417 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3418 } 3419 /* set eigenvalue estimation if preonly has not been requested */ 3420 if (!ispreonly) { 3421 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3422 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Coarse problem eigenvalues: %1.6e %1.6e\n",lambda_min,lambda_max);CHKERRQ(ierr); */ 3423 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3424 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3425 } 3426 /* check coarse problem residual error */ 3427 if (pcbddc->dbg_flag) { 3428 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3429 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3430 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3431 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3432 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3433 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3434 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3435 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3436 if (!ispreonly) { 3437 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3438 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3439 } 3440 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3441 } 3442 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3443 } 3444 3445 /* print additional info */ 3446 if (pcbddc->dbg_flag) { 3447 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3448 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3449 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3450 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3451 } 3452 3453 /* free memory */ 3454 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3455 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3456 PetscFunctionReturn(0); 3457 } 3458 3459 #undef __FUNCT__ 3460 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3461 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3462 { 3463 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3464 PC_IS* pcis = (PC_IS*)pc->data; 3465 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3466 PetscInt i,coarse_size; 3467 PetscInt *local_primal_indices; 3468 PetscErrorCode ierr; 3469 3470 PetscFunctionBegin; 3471 /* Compute global number of coarse dofs */ 3472 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3473 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3474 } 3475 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3476 3477 /* check numbering */ 3478 if (pcbddc->dbg_flag) { 3479 PetscScalar coarsesum,*array; 3480 3481 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3482 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3483 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3484 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3485 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3486 for (i=0;i<pcbddc->local_primal_size;i++) { 3487 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3488 } 3489 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3490 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3491 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3492 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3493 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3494 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3495 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3496 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3497 for (i=0;i<pcis->n;i++) { 3498 if (array[i] == 1.0) { 3499 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3500 } 3501 } 3502 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3503 for (i=0;i<pcis->n;i++) { 3504 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3505 } 3506 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3507 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3508 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3509 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3510 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3511 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3512 if (pcbddc->dbg_flag > 1) { 3513 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3514 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3515 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3516 for (i=0;i<pcbddc->local_primal_size;i++) { 3517 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3518 } 3519 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3520 } 3521 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3522 } 3523 /* get back data */ 3524 *coarse_size_n = coarse_size; 3525 *local_primal_indices_n = local_primal_indices; 3526 PetscFunctionReturn(0); 3527 } 3528 3529 #undef __FUNCT__ 3530 #define __FUNCT__ "PCBDDCGlobalToLocal" 3531 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis) 3532 { 3533 PC_IS* pcis = (PC_IS*)pc->data; 3534 IS localis_t; 3535 PetscInt i,lsize,*idxs; 3536 PetscScalar *vals; 3537 PetscErrorCode ierr; 3538 3539 PetscFunctionBegin; 3540 /* get dirichlet indices in local ordering exploiting local to global map */ 3541 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 3542 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3543 for (i=0;i<lsize;i++) vals[i] = 1.0; 3544 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3545 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3546 if (idxs) { /* multilevel guard */ 3547 ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3548 } 3549 ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr); 3550 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3551 ierr = PetscFree(vals);CHKERRQ(ierr); 3552 ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr); 3553 /* now compute dirichlet set in local ordering */ 3554 ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3555 ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3556 ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3557 for (i=0,lsize=0;i<pcis->n;i++) { 3558 if (vals[i] == 1.0) { 3559 lsize++; 3560 } 3561 } 3562 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 3563 for (i=0,lsize=0;i<pcis->n;i++) { 3564 if (vals[i] == 1.0) { 3565 idxs[lsize++] = i; 3566 } 3567 } 3568 ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3569 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 3570 *localis = localis_t; 3571 PetscFunctionReturn(0); 3572 } 3573