1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscInt i; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 54 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 55 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 for (i=0;i<pcbddc->n_ISForDofs;i++) { 59 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 60 } 61 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 62 pcbddc->n_ISForDofs = 0; 63 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 64 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 65 PetscFunctionReturn(0); 66 } 67 68 #undef __FUNCT__ 69 #define __FUNCT__ "PCBDDCResetTopography" 70 PetscErrorCode PCBDDCResetTopography(PC pc) 71 { 72 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 77 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 78 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "PCBDDCResetSolvers" 84 PetscErrorCode PCBDDCResetSolvers(PC pc) 85 { 86 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 87 PetscErrorCode ierr; 88 89 PetscFunctionBegin; 90 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 91 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 97 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 99 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 102 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 107 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 108 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 109 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 110 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 111 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 117 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 118 { 119 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 120 PC_IS *pcis = (PC_IS*)pc->data; 121 VecType impVecType; 122 PetscInt n_vertices,n_constraints,local_primal_size,n_R,old_size; 123 PetscErrorCode ierr; 124 125 PetscFunctionBegin; 126 if (!pcbddc->ConstraintMatrix) { 127 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 128 } 129 /* get sizes */ 130 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,NULL);CHKERRQ(ierr); 131 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 132 n_constraints = local_primal_size - n_vertices; 133 n_R = pcis->n-n_vertices; 134 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 135 /* local work vectors (try to avoid unneeded work)*/ 136 /* R nodes */ 137 old_size = -1; 138 if (pcbddc->vec1_R) { 139 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 140 } 141 if (n_R != old_size) { 142 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 143 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 144 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 145 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 146 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 147 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 148 } 149 /* local primal dofs */ 150 old_size = -1; 151 if (pcbddc->vec1_P) { 152 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 153 } 154 if (local_primal_size != old_size) { 155 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 156 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 157 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 158 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 159 } 160 /* local explicit constraints */ 161 old_size = -1; 162 if (pcbddc->vec1_C) { 163 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 164 } 165 if (n_constraints && n_constraints != old_size) { 166 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 167 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 168 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 169 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "PCBDDCSetUpCorrection" 176 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 177 { 178 PetscErrorCode ierr; 179 /* pointers to pcis and pcbddc */ 180 PC_IS* pcis = (PC_IS*)pc->data; 181 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 182 /* submatrices of local problem */ 183 Mat A_RV,A_VR,A_VV; 184 /* working matrices */ 185 Mat M1,M2,M3,C_CR; 186 /* working vectors */ 187 Vec vec1_C,vec2_C,vec1_V,vec2_V; 188 /* additional working stuff */ 189 IS is_aux; 190 ISLocalToGlobalMapping BtoNmap; 191 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 192 const PetscScalar *array,*row_cmat_values; 193 const PetscInt *row_cmat_indices,*idx_R_local; 194 PetscInt *vertices,*idx_V_B,*auxindices; 195 PetscInt n_vertices,n_constraints,size_of_constraint; 196 PetscInt i,j,n_R,n_D,n_B; 197 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 198 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 199 MatType impMatType; 200 /* some shortcuts to scalars */ 201 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 202 /* for debugging purposes */ 203 PetscReal *coarsefunctions_errors,*constraints_errors; 204 205 PetscFunctionBegin; 206 /* get number of vertices and their local indices */ 207 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 208 n_constraints = pcbddc->local_primal_size-n_vertices; 209 /* Set Non-overlapping dimensions */ 210 n_B = pcis->n_B; n_D = pcis->n - n_B; 211 n_R = pcis->n-n_vertices; 212 213 /* Set types for local objects needed by BDDC precondtioner */ 214 impMatType = MATSEQDENSE; 215 216 /* Allocating some extra storage just to be safe */ 217 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 218 for (i=0;i<pcis->n;i++) auxindices[i]=i; 219 220 /* vertices in boundary numbering */ 221 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 222 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 223 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 224 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 225 if (i != n_vertices) { 226 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 227 } 228 229 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 230 if (n_constraints) { 231 /* see if we can save some allocations */ 232 if (pcbddc->local_auxmat2) { 233 PetscInt on_R,on_constraints; 234 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 235 if (on_R != n_R || on_constraints != n_constraints) { 236 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 237 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 238 } 239 } 240 /* work vectors */ 241 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 242 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 243 /* auxiliary matrices */ 244 if (!pcbddc->local_auxmat2) { 245 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 246 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 247 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 248 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 249 } 250 251 /* Extract constraints on R nodes: C_{CR} */ 252 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 253 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 254 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 255 256 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 257 for (i=0;i<n_constraints;i++) { 258 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 259 /* Get row of constraint matrix in R numbering */ 260 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 261 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 262 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 263 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 264 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 265 /* Solve for row of constraint matrix in R numbering */ 266 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 267 /* Set values in local_auxmat2 */ 268 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 269 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 270 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 271 } 272 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 273 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 274 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 275 276 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 277 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 278 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 279 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 280 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 281 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 282 ierr = MatSetUp(M1);CHKERRQ(ierr); 283 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 284 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 285 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 286 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 287 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 288 ierr = MatDestroy(&M2);CHKERRQ(ierr); 289 ierr = MatDestroy(&M3);CHKERRQ(ierr); 290 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 291 if (!pcbddc->local_auxmat1) { 292 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 293 } else { 294 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 295 } 296 } 297 298 /* Get submatrices from subdomain matrix */ 299 if (n_vertices) { 300 PetscInt ibs,mbs; 301 PetscBool issbaij; 302 Mat newmat; 303 304 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 305 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 306 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 307 if (ibs != mbs) { /* need to convert to SEQAIJ */ 308 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 309 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 310 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 311 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 312 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 313 } else { 314 /* this is safe */ 315 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 316 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 317 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 318 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 319 /* which of the two approaches is faster? */ 320 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 321 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 322 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 323 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 324 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 325 } else { 326 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 327 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 328 } 329 } 330 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 331 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 332 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 333 } 334 335 /* Matrix of coarse basis functions (local) */ 336 if (pcbddc->coarse_phi_B) { 337 PetscInt on_B,on_primal; 338 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 339 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 340 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 341 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 342 } 343 } 344 if (pcbddc->coarse_phi_D) { 345 PetscInt on_D,on_primal; 346 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 347 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 348 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 349 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 350 } 351 } 352 if (!pcbddc->coarse_phi_B) { 353 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 354 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 355 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 356 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 357 } 358 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 359 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 360 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 361 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 362 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 363 } 364 365 if (pcbddc->dbg_flag) { 366 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 367 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 368 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 369 } 370 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 371 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 372 373 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 374 375 /* vertices */ 376 for (i=0;i<n_vertices;i++) { 377 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 378 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 379 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 380 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 381 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 382 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 383 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 384 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 385 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 386 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 387 if (n_constraints) { 388 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 389 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 390 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 391 } 392 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 393 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 394 395 /* Set values in coarse basis function and subdomain part of coarse_mat */ 396 /* coarse basis functions */ 397 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 398 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 399 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 400 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 401 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 402 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 403 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 404 if (pcbddc->switch_static || pcbddc->dbg_flag) { 405 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 406 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 407 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 408 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 409 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 410 } 411 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 412 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 413 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 415 if (n_constraints) { 416 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 417 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 418 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 419 } 420 421 /* check */ 422 if (pcbddc->dbg_flag) { 423 /* assemble subdomain vector on local nodes */ 424 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 425 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 426 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 427 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 428 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 429 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 430 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 431 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 432 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 433 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 434 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 436 if (n_constraints) { 437 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 438 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 440 } 441 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 442 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 443 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 444 /* check saddle point solution */ 445 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 446 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 447 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 448 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 449 /* shift by the identity matrix */ 450 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 451 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 452 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 453 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 454 } 455 } 456 457 /* constraints */ 458 for (i=0;i<n_constraints;i++) { 459 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 460 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 461 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 462 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 463 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 464 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 465 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 466 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 467 if (n_vertices) { 468 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 469 } 470 /* Set values in coarse basis function and subdomain part of coarse_mat */ 471 /* coarse basis functions */ 472 j = i+n_vertices; /* don't touch this! */ 473 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 474 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 475 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 476 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 477 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 478 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 479 if (pcbddc->switch_static || pcbddc->dbg_flag) { 480 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 481 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 482 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 483 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 484 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 485 } 486 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 487 if (n_vertices) { 488 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 489 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 490 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 491 } 492 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 493 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 494 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 495 496 if (pcbddc->dbg_flag) { 497 /* assemble subdomain vector on nodes */ 498 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 499 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 500 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 501 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 502 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 503 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 504 /* assemble subdomain vector of lagrange multipliers */ 505 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 506 if (n_vertices) { 507 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 508 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 509 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 510 } 511 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 512 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 513 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 514 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 515 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 516 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 517 /* check saddle point solution */ 518 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 519 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 520 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 521 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 522 /* shift by the identity matrix */ 523 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 524 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 525 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 526 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 527 } 528 } 529 /* call assembling routines for local coarse basis */ 530 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 532 if (pcbddc->switch_static || pcbddc->dbg_flag) { 533 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 535 } 536 537 /* compute other basis functions for non-symmetric problems */ 538 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 539 if (!setsym || (setsym && !issym)) { 540 if (!pcbddc->coarse_psi_B) { 541 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 542 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 543 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 544 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 545 } 546 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 547 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 548 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 549 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 550 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 551 } 552 for (i=0;i<pcbddc->local_primal_size;i++) { 553 if (n_constraints) { 554 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 555 for (j=0;j<n_constraints;j++) { 556 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 557 } 558 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 559 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 560 } 561 if (i<n_vertices) { 562 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 563 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 564 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 565 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 566 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 567 if (n_constraints) { 568 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 569 } 570 } else { 571 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 572 } 573 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 574 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 575 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 576 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 577 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 578 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 579 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 580 if (i<n_vertices) { 581 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 582 } 583 if (pcbddc->switch_static || pcbddc->dbg_flag) { 584 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 585 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 586 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 587 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 588 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 589 } 590 591 if (pcbddc->dbg_flag) { 592 /* assemble subdomain vector on nodes */ 593 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 594 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 595 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 596 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 597 if (i<n_vertices) { 598 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 599 } 600 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 601 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 602 /* assemble subdomain vector of lagrange multipliers */ 603 for (j=0;j<pcbddc->local_primal_size;j++) { 604 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 605 } 606 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 607 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 608 /* check saddle point solution */ 609 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 610 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 611 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 612 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 613 /* shift by the identity matrix */ 614 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 615 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 616 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 617 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 618 } 619 } 620 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 621 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 622 if (pcbddc->switch_static || pcbddc->dbg_flag) { 623 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 624 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 625 } 626 } 627 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 628 /* Checking coarse_sub_mat and coarse basis functios */ 629 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 630 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 631 if (pcbddc->dbg_flag) { 632 Mat coarse_sub_mat; 633 Mat AUXMAT,TM1,TM2,TM3,TM4; 634 Mat coarse_phi_D,coarse_phi_B; 635 Mat coarse_psi_D,coarse_psi_B; 636 Mat A_II,A_BB,A_IB,A_BI; 637 MatType checkmattype=MATSEQAIJ; 638 PetscReal real_value; 639 640 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 641 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 642 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 643 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 644 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 645 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 646 if (pcbddc->coarse_psi_B) { 647 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 648 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 649 } 650 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 651 652 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 653 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 654 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 655 if (pcbddc->coarse_psi_B) { 656 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 657 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 658 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 659 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 660 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 661 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 662 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 663 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 664 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 665 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 666 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 667 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 668 } else { 669 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 670 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 671 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 672 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 673 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 674 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 675 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 676 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 677 } 678 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 679 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 680 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 681 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 682 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 683 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 684 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 685 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 686 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 687 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 689 for (i=0;i<pcbddc->local_primal_size;i++) { 690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 691 } 692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 693 for (i=0;i<pcbddc->local_primal_size;i++) { 694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 695 } 696 if (pcbddc->coarse_psi_B) { 697 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 698 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 699 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 700 } 701 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 702 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 704 } 705 } 706 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 707 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 708 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 709 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 710 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 711 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 712 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 713 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 714 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 715 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 716 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 717 if (pcbddc->coarse_psi_B) { 718 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 719 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 720 } 721 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 722 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 723 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 724 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 725 } 726 /* free memory */ 727 if (n_vertices) { 728 ierr = PetscFree(vertices);CHKERRQ(ierr); 729 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 730 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 731 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 732 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 733 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 734 } 735 if (n_constraints) { 736 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 737 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 738 ierr = MatDestroy(&M1);CHKERRQ(ierr); 739 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 740 } 741 ierr = PetscFree(auxindices);CHKERRQ(ierr); 742 /* get back data */ 743 *coarse_submat_vals_n = coarse_submat_vals; 744 PetscFunctionReturn(0); 745 } 746 747 #undef __FUNCT__ 748 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 749 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 750 { 751 PC_IS* pcis = (PC_IS*)(pc->data); 752 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 753 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 754 PetscBool issbaij,isbaij; 755 /* manage repeated solves */ 756 MatReuse reuse; 757 MatStructure matstruct; 758 PetscErrorCode ierr; 759 760 PetscFunctionBegin; 761 if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) { 762 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 763 } 764 /* get mat flags */ 765 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 766 reuse = MAT_INITIAL_MATRIX; 767 if (pc->setupcalled) { 768 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 769 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 770 if (matstruct == SAME_NONZERO_PATTERN) { 771 reuse = MAT_REUSE_MATRIX; 772 } else { 773 reuse = MAT_INITIAL_MATRIX; 774 } 775 } 776 if (reuse == MAT_INITIAL_MATRIX) { 777 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 778 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 779 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 780 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 781 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 782 } 783 784 /* transform local matrices if needed */ 785 if (pcbddc->use_change_of_basis) { 786 Mat change_mat_all; 787 PetscScalar *row_cmat_values; 788 PetscInt *row_cmat_indices; 789 PetscInt *nnz,*is_indices,*temp_indices; 790 PetscInt i,j,k,n_D,n_B; 791 792 /* Get Non-overlapping dimensions */ 793 n_B = pcis->n_B; 794 n_D = pcis->n-n_B; 795 796 /* compute nonzero structure of change of basis on all local nodes */ 797 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 798 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 799 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 800 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 801 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 802 k=1; 803 for (i=0;i<n_B;i++) { 804 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 805 nnz[is_indices[i]]=j; 806 if (k < j) k = j; 807 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 808 } 809 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 810 /* assemble change of basis matrix on the whole set of local dofs */ 811 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 812 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 813 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 814 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 815 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 816 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 817 for (i=0;i<n_D;i++) { 818 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 819 } 820 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 821 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 822 for (i=0;i<n_B;i++) { 823 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 824 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 825 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 826 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 827 } 828 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 829 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 830 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 831 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 832 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 833 if (!issbaij && !isbaij) { 834 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 835 } else { 836 Mat work_mat; 837 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 838 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 839 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 840 } 841 /* 842 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 843 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 844 */ 845 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 846 ierr = PetscFree(nnz);CHKERRQ(ierr); 847 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 848 } else { 849 /* without change of basis, the local matrix is unchanged */ 850 if (!pcbddc->local_mat) { 851 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 852 pcbddc->local_mat = matis->A; 853 } 854 } 855 856 /* get submatrices */ 857 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 858 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 859 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 860 if (!issbaij) { 861 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 862 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 863 } else { 864 Mat newmat; 865 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 866 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 867 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 868 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 869 } 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 875 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 876 { 877 PC_IS* pcis = (PC_IS*)(pc->data); 878 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 879 IS is_aux1,is_aux2; 880 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 881 PetscInt n_vertices,i,j,n_R,n_D,n_B; 882 PetscInt vbs,bs; 883 PetscBT bitmask; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 /* No need to setup local scatters if primal space is unchanged */ 888 if (!pcbddc->new_primal_space_local) { 889 PetscFunctionReturn(0); 890 } 891 /* destroy old objects */ 892 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 893 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 894 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 895 /* Set Non-overlapping dimensions */ 896 n_B = pcis->n_B; n_D = pcis->n - n_B; 897 /* get vertex indices from constraint matrix */ 898 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 899 900 /* create auxiliary bitmask */ 901 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 902 for (i=0;i<n_vertices;i++) { 903 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 904 } 905 906 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 907 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 908 for (i=0, n_R=0; i<pcis->n; i++) { 909 if (!PetscBTLookup(bitmask,i)) { 910 idx_R_local[n_R] = i; 911 n_R++; 912 } 913 } 914 915 /* Block code */ 916 vbs = 1; 917 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 918 if (bs>1 && !(n_vertices%bs)) { 919 PetscBool is_blocked = PETSC_TRUE; 920 PetscInt *vary; 921 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 922 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 923 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 924 for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++; 925 for (i=0; i<n_vertices; i++) { 926 if (vary[i]!=0 && vary[i]!=bs) { 927 is_blocked = PETSC_FALSE; 928 break; 929 } 930 } 931 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 932 vbs = bs; 933 for (i=0;i<n_R/vbs;i++) { 934 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 935 } 936 } 937 ierr = PetscFree(vary);CHKERRQ(ierr); 938 } 939 ierr = PetscFree(vertices);CHKERRQ(ierr); 940 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 941 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 942 943 /* print some info if requested */ 944 if (pcbddc->dbg_flag) { 945 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 946 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 947 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 948 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 949 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 950 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 951 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 952 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 953 } 954 955 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 956 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 957 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 958 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 959 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 960 for (i=0; i<n_D; i++) { 961 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 962 } 963 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 964 for (i=0, j=0; i<n_R; i++) { 965 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 966 aux_array1[j++] = i; 967 } 968 } 969 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 970 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 971 for (i=0, j=0; i<n_B; i++) { 972 if (!PetscBTLookup(bitmask,is_indices[i])) { 973 aux_array2[j++] = i; 974 } 975 } 976 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 977 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 978 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 979 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 980 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 981 982 if (pcbddc->switch_static || pcbddc->dbg_flag) { 983 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 984 for (i=0, j=0; i<n_R; i++) { 985 if (PetscBTLookup(bitmask,idx_R_local[i])) { 986 aux_array1[j++] = i; 987 } 988 } 989 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 990 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 991 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 992 } 993 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 994 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 995 PetscFunctionReturn(0); 996 } 997 998 999 #undef __FUNCT__ 1000 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1001 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1002 { 1003 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1004 PC_IS *pcis = (PC_IS*)pc->data; 1005 PC pc_temp; 1006 Mat A_RR; 1007 MatStructure matstruct; 1008 MatReuse reuse; 1009 PetscScalar m_one = -1.0; 1010 PetscReal value; 1011 PetscInt n_D,n_R,ibs,mbs; 1012 PetscBool use_exact,use_exact_reduced,issbaij; 1013 PetscErrorCode ierr; 1014 /* prefixes stuff */ 1015 char dir_prefix[256],neu_prefix[256],str_level[3]; 1016 size_t len; 1017 1018 PetscFunctionBegin; 1019 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 1020 1021 /* compute prefixes */ 1022 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1023 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1024 if (!pcbddc->current_level) { 1025 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1026 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1027 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1028 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1029 } else { 1030 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1031 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 1032 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1033 len -= 15; /* remove "pc_bddc_coarse_" */ 1034 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 1035 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1036 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1037 *(dir_prefix+len)='\0'; 1038 *(neu_prefix+len)='\0'; 1039 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1040 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1041 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1042 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1043 } 1044 1045 /* DIRICHLET PROBLEM */ 1046 /* Matrix for Dirichlet problem is pcis->A_II */ 1047 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1048 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1049 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1050 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1051 /* default */ 1052 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1053 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1054 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1055 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1056 if (issbaij) { 1057 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1058 } else { 1059 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1060 } 1061 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1062 } 1063 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 1064 /* Allow user's customization */ 1065 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1066 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1067 if (!n_D) { 1068 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1069 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1070 } 1071 /* Set Up KSP for Dirichlet problem of BDDC */ 1072 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1073 /* set ksp_D into pcis data */ 1074 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1075 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1076 pcis->ksp_D = pcbddc->ksp_D; 1077 1078 /* NEUMANN PROBLEM */ 1079 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1080 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1081 if (pcbddc->ksp_R) { /* already created ksp */ 1082 PetscInt nn_R; 1083 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr); 1084 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1085 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1086 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1087 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1088 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1089 reuse = MAT_INITIAL_MATRIX; 1090 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1091 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1092 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1093 reuse = MAT_INITIAL_MATRIX; 1094 } else { /* safe to reuse the matrix */ 1095 reuse = MAT_REUSE_MATRIX; 1096 } 1097 } 1098 /* last check */ 1099 if (matstruct == DIFFERENT_NONZERO_PATTERN) { 1100 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1101 reuse = MAT_INITIAL_MATRIX; 1102 } 1103 } else { /* first time, so we need to create the matrix */ 1104 reuse = MAT_INITIAL_MATRIX; 1105 } 1106 /* extract A_RR */ 1107 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1108 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1109 if (ibs != mbs) { 1110 Mat newmat; 1111 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1112 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1113 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1114 } else { 1115 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1116 } 1117 if (!pcbddc->ksp_R) { /* create object if not present */ 1118 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1119 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1120 /* default */ 1121 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1122 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1123 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1124 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1125 if (issbaij) { 1126 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1127 } else { 1128 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1129 } 1130 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1131 } 1132 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1133 /* Allow user's customization */ 1134 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1135 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1136 if (!n_R) { 1137 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1138 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1139 } 1140 /* Set Up KSP for Neumann problem of BDDC */ 1141 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1142 1143 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1144 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1145 /* Dirichlet */ 1146 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1147 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1148 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1149 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1150 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1151 /* need to be adapted? */ 1152 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1153 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1154 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1155 /* print info */ 1156 if (pcbddc->dbg_flag) { 1157 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1158 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1159 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1160 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1161 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1162 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1163 } 1164 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1165 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1166 } 1167 1168 /* Neumann */ 1169 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1170 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1171 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1172 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1173 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1174 /* need to be adapted? */ 1175 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1176 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1177 /* print info */ 1178 if (pcbddc->dbg_flag) { 1179 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1180 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1181 } 1182 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1183 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1184 } 1185 } 1186 /* free Neumann problem's matrix */ 1187 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1193 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1194 { 1195 PetscErrorCode ierr; 1196 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1197 1198 PetscFunctionBegin; 1199 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1200 if (pcbddc->local_auxmat1) { 1201 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1202 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1203 } 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1209 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1210 { 1211 PetscErrorCode ierr; 1212 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1213 PC_IS* pcis = (PC_IS*) (pc->data); 1214 const PetscScalar zero = 0.0; 1215 1216 PetscFunctionBegin; 1217 /* Application of PHI^T (or PSI^T) */ 1218 if (pcbddc->coarse_psi_B) { 1219 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1220 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1221 } else { 1222 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1223 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1224 } 1225 /* Scatter data of coarse_rhs */ 1226 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1227 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1228 1229 /* Local solution on R nodes */ 1230 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1231 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1232 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 if (pcbddc->switch_static) { 1234 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1235 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1236 } 1237 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1238 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1239 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1240 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1241 if (pcbddc->switch_static) { 1242 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1243 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 } 1245 1246 /* Coarse solution */ 1247 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1248 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1249 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1250 } 1251 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1252 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1253 1254 /* Sum contributions from two levels */ 1255 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1256 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1262 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1263 { 1264 PetscErrorCode ierr; 1265 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1266 1267 PetscFunctionBegin; 1268 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1269 PetscFunctionReturn(0); 1270 } 1271 1272 #undef __FUNCT__ 1273 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1274 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1275 { 1276 PetscErrorCode ierr; 1277 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1278 1279 PetscFunctionBegin; 1280 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1281 PetscFunctionReturn(0); 1282 } 1283 1284 /* uncomment for testing purposes */ 1285 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1288 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1289 { 1290 PetscErrorCode ierr; 1291 PC_IS* pcis = (PC_IS*)(pc->data); 1292 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1293 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1294 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1295 MatType impMatType=MATSEQAIJ; 1296 /* one and zero */ 1297 PetscScalar one=1.0,zero=0.0; 1298 /* space to store constraints and their local indices */ 1299 PetscScalar *temp_quadrature_constraint; 1300 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1301 /* iterators */ 1302 PetscInt i,j,k,total_counts,temp_start_ptr; 1303 /* stuff to store connected components stored in pcbddc->mat_graph */ 1304 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1305 PetscInt n_ISForFaces,n_ISForEdges; 1306 /* near null space stuff */ 1307 MatNullSpace nearnullsp; 1308 const Vec *nearnullvecs; 1309 Vec *localnearnullsp; 1310 PetscBool nnsp_has_cnst; 1311 PetscInt nnsp_size; 1312 PetscScalar *array; 1313 /* BLAS integers */ 1314 PetscBLASInt lwork,lierr; 1315 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1316 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1317 /* LAPACK working arrays for SVD or POD */ 1318 PetscBool skip_lapack; 1319 PetscScalar *work; 1320 PetscReal *singular_vals; 1321 #if defined(PETSC_USE_COMPLEX) 1322 PetscReal *rwork; 1323 #endif 1324 #if defined(PETSC_MISSING_LAPACK_GESVD) 1325 PetscBLASInt Blas_one_2=1; 1326 PetscScalar *temp_basis,*correlation_mat; 1327 #else 1328 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1329 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1330 #endif 1331 /* reuse */ 1332 PetscInt olocal_primal_size; 1333 PetscInt *oprimal_indices_local_idxs; 1334 /* change of basis */ 1335 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1336 PetscBool boolforchange,qr_needed; 1337 PetscBT touched,change_basis,qr_needed_idx; 1338 /* auxiliary stuff */ 1339 PetscInt *nnz,*is_indices,*local_to_B; 1340 /* some quantities */ 1341 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1342 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1343 1344 1345 PetscFunctionBegin; 1346 /* Destroy Mat objects computed previously */ 1347 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1348 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1349 /* Get index sets for faces, edges and vertices from graph */ 1350 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1351 pcbddc->use_vertices = PETSC_TRUE; 1352 } 1353 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1354 /* HACK: provide functions to set change of basis */ 1355 if (!ISForVertices && pcbddc->NullSpace) { 1356 pcbddc->use_change_of_basis = PETSC_TRUE; 1357 pcbddc->use_change_on_faces = PETSC_FALSE; 1358 } 1359 /* print some info */ 1360 if (pcbddc->dbg_flag) { 1361 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1362 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1363 i = 0; 1364 if (ISForVertices) { 1365 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1366 } 1367 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1368 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1369 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1370 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1371 } 1372 /* check if near null space is attached to global mat */ 1373 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1374 if (nearnullsp) { 1375 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1376 /* remove any stored info */ 1377 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1378 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1379 /* store information for BDDC solver reuse */ 1380 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1381 pcbddc->onearnullspace = nearnullsp; 1382 ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1383 for (i=0;i<nnsp_size;i++) { 1384 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1385 } 1386 } else { /* if near null space is not provided BDDC uses constants by default */ 1387 nnsp_size = 0; 1388 nnsp_has_cnst = PETSC_TRUE; 1389 } 1390 /* get max number of constraints on a single cc */ 1391 max_constraints = nnsp_size; 1392 if (nnsp_has_cnst) max_constraints++; 1393 1394 /* 1395 Evaluate maximum storage size needed by the procedure 1396 - temp_indices will contain start index of each constraint stored as follows 1397 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1398 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1399 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1400 */ 1401 total_counts = n_ISForFaces+n_ISForEdges; 1402 total_counts *= max_constraints; 1403 n_vertices = 0; 1404 if (ISForVertices) { 1405 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1406 } 1407 total_counts += n_vertices; 1408 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1409 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1410 total_counts = 0; 1411 max_size_of_constraint = 0; 1412 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1413 if (i<n_ISForEdges) { 1414 used_IS = &ISForEdges[i]; 1415 } else { 1416 used_IS = &ISForFaces[i-n_ISForEdges]; 1417 } 1418 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1419 total_counts += j; 1420 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1421 } 1422 total_counts *= max_constraints; 1423 total_counts += n_vertices; 1424 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1425 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1426 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1427 /* local to boundary numbering */ 1428 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1429 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1430 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1431 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1432 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1433 /* get local part of global near null space vectors */ 1434 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1435 for (k=0;k<nnsp_size;k++) { 1436 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1437 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1438 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1439 } 1440 1441 /* whether or not to skip lapack calls */ 1442 skip_lapack = PETSC_TRUE; 1443 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1444 1445 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1446 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1447 PetscScalar temp_work; 1448 #if defined(PETSC_MISSING_LAPACK_GESVD) 1449 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1450 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1451 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1452 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1453 #if defined(PETSC_USE_COMPLEX) 1454 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1455 #endif 1456 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1457 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1458 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1459 lwork = -1; 1460 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1461 #if !defined(PETSC_USE_COMPLEX) 1462 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1463 #else 1464 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1465 #endif 1466 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1467 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1468 #else /* on missing GESVD */ 1469 /* SVD */ 1470 PetscInt max_n,min_n; 1471 max_n = max_size_of_constraint; 1472 min_n = max_constraints; 1473 if (max_size_of_constraint < max_constraints) { 1474 min_n = max_size_of_constraint; 1475 max_n = max_constraints; 1476 } 1477 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1478 #if defined(PETSC_USE_COMPLEX) 1479 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1480 #endif 1481 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1482 lwork = -1; 1483 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1484 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1485 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1486 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1487 #if !defined(PETSC_USE_COMPLEX) 1488 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1489 #else 1490 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1491 #endif 1492 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1493 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1494 #endif /* on missing GESVD */ 1495 /* Allocate optimal workspace */ 1496 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1497 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1498 } 1499 /* Now we can loop on constraining sets */ 1500 total_counts = 0; 1501 temp_indices[0] = 0; 1502 /* vertices */ 1503 if (ISForVertices) { 1504 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1505 if (nnsp_has_cnst) { /* consider all vertices */ 1506 for (i=0;i<n_vertices;i++) { 1507 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1508 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1509 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1510 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1511 total_counts++; 1512 } 1513 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1514 PetscBool used_vertex; 1515 for (i=0;i<n_vertices;i++) { 1516 used_vertex = PETSC_FALSE; 1517 k = 0; 1518 while (!used_vertex && k<nnsp_size) { 1519 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1520 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1521 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1522 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1523 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1524 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1525 total_counts++; 1526 used_vertex = PETSC_TRUE; 1527 } 1528 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1529 k++; 1530 } 1531 } 1532 } 1533 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1534 n_vertices = total_counts; 1535 } 1536 1537 /* edges and faces */ 1538 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1539 if (i<n_ISForEdges) { 1540 used_IS = &ISForEdges[i]; 1541 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1542 } else { 1543 used_IS = &ISForFaces[i-n_ISForEdges]; 1544 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1545 } 1546 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1547 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1548 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1549 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1550 /* change of basis should not be performed on local periodic nodes */ 1551 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1552 if (nnsp_has_cnst) { 1553 PetscScalar quad_value; 1554 temp_constraints++; 1555 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1556 for (j=0;j<size_of_constraint;j++) { 1557 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1558 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1559 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1560 } 1561 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1562 total_counts++; 1563 } 1564 for (k=0;k<nnsp_size;k++) { 1565 PetscReal real_value; 1566 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1567 for (j=0;j<size_of_constraint;j++) { 1568 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1569 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1570 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1571 } 1572 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1573 /* check if array is null on the connected component */ 1574 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1575 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1576 if (real_value > 0.0) { /* keep indices and values */ 1577 temp_constraints++; 1578 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1579 total_counts++; 1580 } 1581 } 1582 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1583 valid_constraints = temp_constraints; 1584 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1585 if (!pcbddc->use_nnsp_true && temp_constraints) { 1586 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1587 1588 #if defined(PETSC_MISSING_LAPACK_GESVD) 1589 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1590 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1591 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1592 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1593 from that computed using LAPACKgesvd 1594 -> This is due to a different computation of eigenvectors in LAPACKheev 1595 -> The quality of the POD-computed basis will be the same */ 1596 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1597 /* Store upper triangular part of correlation matrix */ 1598 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1599 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1600 for (j=0;j<temp_constraints;j++) { 1601 for (k=0;k<j+1;k++) { 1602 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1603 } 1604 } 1605 /* compute eigenvalues and eigenvectors of correlation matrix */ 1606 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1607 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1608 #if !defined(PETSC_USE_COMPLEX) 1609 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1610 #else 1611 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1612 #endif 1613 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1614 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1615 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1616 j=0; 1617 while (j < temp_constraints && singular_vals[j] < tol) j++; 1618 total_counts=total_counts-j; 1619 valid_constraints = temp_constraints-j; 1620 /* scale and copy POD basis into used quadrature memory */ 1621 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1622 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1623 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1624 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1625 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1626 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1627 if (j<temp_constraints) { 1628 PetscInt ii; 1629 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1630 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1631 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1632 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1633 for (k=0;k<temp_constraints-j;k++) { 1634 for (ii=0;ii<size_of_constraint;ii++) { 1635 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1636 } 1637 } 1638 } 1639 #else /* on missing GESVD */ 1640 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1641 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1642 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1643 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1644 #if !defined(PETSC_USE_COMPLEX) 1645 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1646 #else 1647 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1648 #endif 1649 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1650 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1651 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1652 k = temp_constraints; 1653 if (k > size_of_constraint) k = size_of_constraint; 1654 j = 0; 1655 while (j < k && singular_vals[k-j-1] < tol) j++; 1656 total_counts = total_counts-temp_constraints+k-j; 1657 valid_constraints = k-j; 1658 #endif /* on missing GESVD */ 1659 } 1660 /* setting change_of_basis flag is safe now */ 1661 if (boolforchange) { 1662 for (j=0;j<valid_constraints;j++) { 1663 PetscBTSet(change_basis,total_counts-j-1); 1664 } 1665 } 1666 } 1667 /* free index sets of faces, edges and vertices */ 1668 for (i=0;i<n_ISForFaces;i++) { 1669 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1670 } 1671 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1672 for (i=0;i<n_ISForEdges;i++) { 1673 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1674 } 1675 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1676 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1677 1678 /* free workspace */ 1679 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1680 ierr = PetscFree(work);CHKERRQ(ierr); 1681 #if defined(PETSC_USE_COMPLEX) 1682 ierr = PetscFree(rwork);CHKERRQ(ierr); 1683 #endif 1684 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1685 #if defined(PETSC_MISSING_LAPACK_GESVD) 1686 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1687 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1688 #endif 1689 } 1690 for (k=0;k<nnsp_size;k++) { 1691 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1692 } 1693 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1694 1695 /* set quantities in pcbddc data structure and store previous primal size */ 1696 /* n_vertices defines the number of subdomain corners in the primal space */ 1697 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1698 olocal_primal_size = pcbddc->local_primal_size; 1699 pcbddc->local_primal_size = total_counts; 1700 pcbddc->n_vertices = n_vertices; 1701 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1702 1703 /* Create constraint matrix */ 1704 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1705 /* so we need to set it up properly either with or without change of basis */ 1706 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1707 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1708 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1709 /* array to compute a local numbering of constraints : vertices first then constraints */ 1710 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1711 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1712 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1713 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1714 /* auxiliary stuff for basis change */ 1715 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1716 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1717 1718 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1719 total_primal_vertices=0; 1720 for (i=0;i<pcbddc->local_primal_size;i++) { 1721 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1722 if (size_of_constraint == 1) { 1723 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1724 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1725 aux_primal_minloc[total_primal_vertices]=0; 1726 total_primal_vertices++; 1727 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1728 PetscInt min_loc,min_index; 1729 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1730 /* find first untouched local node */ 1731 k = 0; 1732 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1733 min_index = global_indices[k]; 1734 min_loc = k; 1735 /* search the minimum among global nodes already untouched on the cc */ 1736 for (k=1;k<size_of_constraint;k++) { 1737 /* there can be more than one constraint on a single connected component */ 1738 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1739 min_index = global_indices[k]; 1740 min_loc = k; 1741 } 1742 } 1743 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1744 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1745 aux_primal_minloc[total_primal_vertices]=min_loc; 1746 total_primal_vertices++; 1747 } 1748 } 1749 /* determine if a QR strategy is needed for change of basis */ 1750 qr_needed = PETSC_FALSE; 1751 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1752 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1753 if (PetscBTLookup(change_basis,i)) { 1754 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1755 j = 0; 1756 for (k=0;k<size_of_constraint;k++) { 1757 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1758 j++; 1759 } 1760 } 1761 /* found more than one primal dof on the cc */ 1762 if (j > 1) { 1763 PetscBTSet(qr_needed_idx,i); 1764 qr_needed = PETSC_TRUE; 1765 } 1766 } 1767 } 1768 /* free workspace */ 1769 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1770 1771 /* permute indices in order to have a sorted set of vertices */ 1772 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1773 1774 /* nonzero structure of constraint matrix */ 1775 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1776 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1777 j=total_primal_vertices; 1778 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1779 if (!PetscBTLookup(change_basis,i)) { 1780 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1781 j++; 1782 } 1783 } 1784 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1785 ierr = PetscFree(nnz);CHKERRQ(ierr); 1786 /* set values in constraint matrix */ 1787 for (i=0;i<total_primal_vertices;i++) { 1788 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1789 } 1790 total_counts = total_primal_vertices; 1791 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1792 if (!PetscBTLookup(change_basis,i)) { 1793 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1794 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1795 total_counts++; 1796 } 1797 } 1798 /* assembling */ 1799 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1800 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1801 /* 1802 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1803 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1804 */ 1805 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1806 if (pcbddc->use_change_of_basis) { 1807 /* dual and primal dofs on a single cc */ 1808 PetscInt dual_dofs,primal_dofs; 1809 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1810 PetscInt primal_counter; 1811 /* working stuff for GEQRF */ 1812 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1813 PetscBLASInt lqr_work; 1814 /* working stuff for UNGQR */ 1815 PetscScalar *gqr_work,lgqr_work_t; 1816 PetscBLASInt lgqr_work; 1817 /* working stuff for TRTRS */ 1818 PetscScalar *trs_rhs; 1819 PetscBLASInt Blas_NRHS; 1820 /* pointers for values insertion into change of basis matrix */ 1821 PetscInt *start_rows,*start_cols; 1822 PetscScalar *start_vals; 1823 /* working stuff for values insertion */ 1824 PetscBT is_primal; 1825 1826 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1827 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1828 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1829 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1830 /* work arrays */ 1831 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1832 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1833 /* nonzeros per row */ 1834 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1835 if (PetscBTLookup(change_basis,i)) { 1836 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1837 if (PetscBTLookup(qr_needed_idx,i)) { 1838 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1839 } else { 1840 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1841 /* get local primal index on the cc */ 1842 j = 0; 1843 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1844 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1845 } 1846 } 1847 } 1848 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1849 ierr = PetscFree(nnz);CHKERRQ(ierr); 1850 /* Set initial identity in the matrix */ 1851 for (i=0;i<pcis->n_B;i++) { 1852 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1853 } 1854 1855 if (pcbddc->dbg_flag) { 1856 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1857 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1858 } 1859 1860 1861 /* Now we loop on the constraints which need a change of basis */ 1862 /* 1863 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1864 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1865 1866 Basic blocks of change of basis matrix T computed 1867 1868 - Using the following block transformation if there is only a primal dof on the cc 1869 (in the example, primal dof is the last one of the edge in LOCAL ordering 1870 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1871 | 1 0 ... 0 1 | 1872 | 0 1 ... 0 1 | 1873 | ... | 1874 | 0 ... 1 1 | 1875 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1876 1877 - via QR decomposition of constraints otherwise 1878 */ 1879 if (qr_needed) { 1880 /* space to store Q */ 1881 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1882 /* first we issue queries for optimal work */ 1883 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1884 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1885 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1886 lqr_work = -1; 1887 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1888 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1889 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1890 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1891 lgqr_work = -1; 1892 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1893 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1894 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1895 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1896 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1897 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1898 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1899 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1900 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1901 /* array to store scaling factors for reflectors */ 1902 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1903 /* array to store rhs and solution of triangular solver */ 1904 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1905 /* allocating workspace for check */ 1906 if (pcbddc->dbg_flag) { 1907 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1908 } 1909 } 1910 /* array to store whether a node is primal or not */ 1911 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1912 for (i=0;i<total_primal_vertices;i++) { 1913 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1914 } 1915 1916 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1917 /* -> using implicit ordering contained in temp_indices data */ 1918 total_counts = pcbddc->n_vertices; 1919 primal_counter = total_counts; 1920 while (total_counts<pcbddc->local_primal_size) { 1921 primal_dofs = 1; 1922 if (PetscBTLookup(change_basis,total_counts)) { 1923 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1924 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1925 primal_dofs++; 1926 } 1927 /* get constraint info */ 1928 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1929 dual_dofs = size_of_constraint-primal_dofs; 1930 1931 if (pcbddc->dbg_flag) { 1932 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1933 } 1934 1935 if (primal_dofs > 1) { /* QR */ 1936 1937 /* copy quadrature constraints for change of basis check */ 1938 if (pcbddc->dbg_flag) { 1939 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1940 } 1941 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1942 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1943 1944 /* compute QR decomposition of constraints */ 1945 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1946 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1947 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1948 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1949 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1950 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1951 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1952 1953 /* explictly compute R^-T */ 1954 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1955 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1956 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1957 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1958 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1959 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1960 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1961 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1962 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1963 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1964 1965 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1966 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1967 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1968 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1969 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1970 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1971 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1972 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1973 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1974 1975 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1976 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1977 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1978 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1979 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1980 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1981 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1982 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1983 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1984 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1985 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1986 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1987 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1988 1989 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1990 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1991 /* insert cols for primal dofs */ 1992 for (j=0;j<primal_dofs;j++) { 1993 start_vals = &qr_basis[j*size_of_constraint]; 1994 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1995 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1996 } 1997 /* insert cols for dual dofs */ 1998 for (j=0,k=0;j<dual_dofs;k++) { 1999 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2000 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2001 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2002 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2003 j++; 2004 } 2005 } 2006 2007 /* check change of basis */ 2008 if (pcbddc->dbg_flag) { 2009 PetscInt ii,jj; 2010 PetscBool valid_qr=PETSC_TRUE; 2011 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2012 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2013 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2014 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2015 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2016 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2017 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2018 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2019 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2020 for (jj=0;jj<size_of_constraint;jj++) { 2021 for (ii=0;ii<primal_dofs;ii++) { 2022 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2023 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2024 } 2025 } 2026 if (!valid_qr) { 2027 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2028 for (jj=0;jj<size_of_constraint;jj++) { 2029 for (ii=0;ii<primal_dofs;ii++) { 2030 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2031 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2032 } 2033 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2034 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2035 } 2036 } 2037 } 2038 } else { 2039 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2040 } 2041 } 2042 } else { /* simple transformation block */ 2043 PetscInt row,col; 2044 PetscScalar val; 2045 for (j=0;j<size_of_constraint;j++) { 2046 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2047 if (!PetscBTLookup(is_primal,row)) { 2048 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2049 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2050 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2051 } else { 2052 for (k=0;k<size_of_constraint;k++) { 2053 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2054 if (row != col) { 2055 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2056 } else { 2057 val = 1.0; 2058 } 2059 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2060 } 2061 } 2062 } 2063 } 2064 /* increment primal counter */ 2065 primal_counter += primal_dofs; 2066 } else { 2067 if (pcbddc->dbg_flag) { 2068 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2069 } 2070 } 2071 /* increment constraint counter total_counts */ 2072 total_counts += primal_dofs; 2073 } 2074 2075 /* free workspace */ 2076 if (qr_needed) { 2077 if (pcbddc->dbg_flag) { 2078 ierr = PetscFree(work);CHKERRQ(ierr); 2079 } 2080 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2081 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2082 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2083 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2084 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2085 } 2086 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2087 /* assembling */ 2088 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2089 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2090 /* 2091 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2092 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2093 */ 2094 } 2095 2096 /* get indices in local ordering for vertices and constraints */ 2097 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2098 ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr); 2099 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2100 } 2101 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2102 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2103 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2104 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2105 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2106 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2107 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2108 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2109 /* check if a new primal space has been introduced */ 2110 pcbddc->new_primal_space_local = PETSC_TRUE; 2111 if (olocal_primal_size == pcbddc->local_primal_size) { 2112 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2113 pcbddc->new_primal_space_local = !pcbddc->new_primal_space_local; 2114 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2115 } 2116 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2117 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2118 2119 /* flush dbg viewer */ 2120 if (pcbddc->dbg_flag) { 2121 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2122 } 2123 2124 /* free workspace */ 2125 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2126 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2127 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2128 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2129 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2130 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2131 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2132 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2133 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2134 PetscFunctionReturn(0); 2135 } 2136 2137 #undef __FUNCT__ 2138 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2139 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2140 { 2141 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2142 PC_IS *pcis = (PC_IS*)pc->data; 2143 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2144 PetscInt bs,ierr,i,vertex_size; 2145 PetscViewer viewer=pcbddc->dbg_viewer; 2146 2147 PetscFunctionBegin; 2148 /* Reset previously computed graph */ 2149 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2150 /* Init local Graph struct */ 2151 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2152 2153 /* Check validity of the csr graph passed in by the user */ 2154 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2155 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2156 } 2157 2158 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2159 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2160 Mat mat_adj; 2161 const PetscInt *xadj,*adjncy; 2162 PetscBool flg_row=PETSC_TRUE; 2163 2164 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2165 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2166 if (!flg_row) { 2167 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2168 } 2169 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2170 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2171 if (!flg_row) { 2172 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2173 } 2174 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2175 } 2176 2177 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2178 vertex_size = 1; 2179 if (!pcbddc->user_provided_isfordofs) { 2180 if (!pcbddc->n_ISForDofs) { 2181 IS *custom_ISForDofs; 2182 2183 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2184 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2185 for (i=0;i<bs;i++) { 2186 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2187 } 2188 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2189 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2190 /* remove my references to IS objects */ 2191 for (i=0;i<bs;i++) { 2192 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2193 } 2194 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2195 } 2196 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2197 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2198 } 2199 2200 /* Setup of Graph */ 2201 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2202 2203 /* Graph's connected components analysis */ 2204 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2205 2206 /* print some info to stdout */ 2207 if (pcbddc->dbg_flag) { 2208 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2209 } 2210 2211 /* mark topography has done */ 2212 pcbddc->recompute_topography = PETSC_FALSE; 2213 PetscFunctionReturn(0); 2214 } 2215 2216 #undef __FUNCT__ 2217 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2218 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2219 { 2220 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2221 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2222 PetscErrorCode ierr; 2223 2224 PetscFunctionBegin; 2225 n = 0; 2226 vertices = 0; 2227 if (pcbddc->ConstraintMatrix) { 2228 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2229 for (i=0;i<local_primal_size;i++) { 2230 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2231 if (size_of_constraint == 1) n++; 2232 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2233 } 2234 if (vertices_idx) { 2235 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2236 n = 0; 2237 for (i=0;i<local_primal_size;i++) { 2238 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2239 if (size_of_constraint == 1) { 2240 vertices[n++]=row_cmat_indices[0]; 2241 } 2242 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2243 } 2244 } 2245 } 2246 *n_vertices = n; 2247 if (vertices_idx) *vertices_idx = vertices; 2248 PetscFunctionReturn(0); 2249 } 2250 2251 #undef __FUNCT__ 2252 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2253 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2254 { 2255 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2256 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2257 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2258 PetscBT touched; 2259 PetscErrorCode ierr; 2260 2261 /* This function assumes that the number of local constraints per connected component 2262 is not greater than the number of nodes defined for the connected component 2263 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2264 PetscFunctionBegin; 2265 n = 0; 2266 constraints_index = 0; 2267 if (pcbddc->ConstraintMatrix) { 2268 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2269 max_size_of_constraint = 0; 2270 for (i=0;i<local_primal_size;i++) { 2271 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2272 if (size_of_constraint > 1) { 2273 n++; 2274 } 2275 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2276 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2277 } 2278 if (constraints_idx) { 2279 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2280 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2281 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2282 n = 0; 2283 for (i=0;i<local_primal_size;i++) { 2284 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2285 if (size_of_constraint > 1) { 2286 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2287 /* find first untouched local node */ 2288 j = 0; 2289 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2290 min_index = row_cmat_global_indices[j]; 2291 min_loc = j; 2292 /* search the minimum among nodes not yet touched on the connected component 2293 since there can be more than one constraint on a single cc */ 2294 for (j=1;j<size_of_constraint;j++) { 2295 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2296 min_index = row_cmat_global_indices[j]; 2297 min_loc = j; 2298 } 2299 } 2300 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2301 constraints_index[n++] = row_cmat_indices[min_loc]; 2302 } 2303 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2304 } 2305 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2306 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2307 } 2308 } 2309 *n_constraints = n; 2310 if (constraints_idx) *constraints_idx = constraints_index; 2311 PetscFunctionReturn(0); 2312 } 2313 2314 /* the next two functions has been adapted from pcis.c */ 2315 #undef __FUNCT__ 2316 #define __FUNCT__ "PCBDDCApplySchur" 2317 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2318 { 2319 PetscErrorCode ierr; 2320 PC_IS *pcis = (PC_IS*)(pc->data); 2321 2322 PetscFunctionBegin; 2323 if (!vec2_B) { vec2_B = v; } 2324 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2325 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2326 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2327 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2328 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2329 PetscFunctionReturn(0); 2330 } 2331 2332 #undef __FUNCT__ 2333 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2334 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2335 { 2336 PetscErrorCode ierr; 2337 PC_IS *pcis = (PC_IS*)(pc->data); 2338 2339 PetscFunctionBegin; 2340 if (!vec2_B) { vec2_B = v; } 2341 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2342 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2343 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2344 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2345 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2346 PetscFunctionReturn(0); 2347 } 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "PCBDDCSubsetNumbering" 2351 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2352 { 2353 Vec local_vec,global_vec; 2354 IS seqis,paris; 2355 VecScatter scatter_ctx; 2356 PetscScalar *array; 2357 PetscInt *temp_global_dofs; 2358 PetscScalar globalsum; 2359 PetscInt i,j,s; 2360 PetscInt nlocals,first_index,old_index,max_local; 2361 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2362 PetscMPIInt *dof_sizes,*dof_displs; 2363 PetscBool first_found; 2364 PetscErrorCode ierr; 2365 2366 PetscFunctionBegin; 2367 /* mpi buffers */ 2368 MPI_Comm_size(comm,&size_prec_comm); 2369 MPI_Comm_rank(comm,&rank_prec_comm); 2370 j = ( !rank_prec_comm ? size_prec_comm : 0); 2371 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2372 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2373 /* get maximum size of subset */ 2374 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2375 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2376 max_local = 0; 2377 if (n_local_dofs) { 2378 max_local = temp_global_dofs[0]; 2379 for (i=1;i<n_local_dofs;i++) { 2380 if (max_local < temp_global_dofs[i] ) { 2381 max_local = temp_global_dofs[i]; 2382 } 2383 } 2384 } 2385 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2386 max_global++; 2387 max_local = 0; 2388 if (n_local_dofs) { 2389 max_local = local_dofs[0]; 2390 for (i=1;i<n_local_dofs;i++) { 2391 if (max_local < local_dofs[i] ) { 2392 max_local = local_dofs[i]; 2393 } 2394 } 2395 } 2396 max_local++; 2397 /* allocate workspace */ 2398 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2399 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2400 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2401 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2402 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2403 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2404 /* create scatter */ 2405 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2406 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2407 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2408 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2409 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2410 /* init array */ 2411 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2412 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2413 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2414 if (local_dofs_mult) { 2415 for (i=0;i<n_local_dofs;i++) { 2416 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2417 } 2418 } else { 2419 for (i=0;i<n_local_dofs;i++) { 2420 array[local_dofs[i]]=1.0; 2421 } 2422 } 2423 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2424 /* scatter into global vec and get total number of global dofs */ 2425 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2426 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2427 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2428 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2429 /* Fill global_vec with cumulative function for global numbering */ 2430 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2431 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2432 nlocals = 0; 2433 first_index = -1; 2434 first_found = PETSC_FALSE; 2435 for (i=0;i<s;i++) { 2436 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2437 first_found = PETSC_TRUE; 2438 first_index = i; 2439 } 2440 nlocals += (PetscInt)PetscRealPart(array[i]); 2441 } 2442 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2443 if (!rank_prec_comm) { 2444 dof_displs[0]=0; 2445 for (i=1;i<size_prec_comm;i++) { 2446 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2447 } 2448 } 2449 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2450 if (first_found) { 2451 array[first_index] += (PetscScalar)nlocals; 2452 old_index = first_index; 2453 for (i=first_index+1;i<s;i++) { 2454 if (PetscRealPart(array[i]) > 0.0) { 2455 array[i] += array[old_index]; 2456 old_index = i; 2457 } 2458 } 2459 } 2460 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2461 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2462 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2463 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2464 /* get global ordering of local dofs */ 2465 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2466 if (local_dofs_mult) { 2467 for (i=0;i<n_local_dofs;i++) { 2468 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2469 } 2470 } else { 2471 for (i=0;i<n_local_dofs;i++) { 2472 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2473 } 2474 } 2475 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2476 /* free workspace */ 2477 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2478 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2479 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2480 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2481 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2482 /* return pointer to global ordering of local dofs */ 2483 *global_numbering_subset = temp_global_dofs; 2484 PetscFunctionReturn(0); 2485 } 2486 2487 #undef __FUNCT__ 2488 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2489 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2490 { 2491 PetscInt i,j; 2492 PetscScalar *alphas; 2493 PetscErrorCode ierr; 2494 2495 PetscFunctionBegin; 2496 /* this implements stabilized Gram-Schmidt */ 2497 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2498 for (i=0;i<n;i++) { 2499 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2500 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2501 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2502 } 2503 ierr = PetscFree(alphas);CHKERRQ(ierr); 2504 PetscFunctionReturn(0); 2505 } 2506 2507 /* TODO 2508 - now preallocation is done assuming SEQDENSE local matrices 2509 */ 2510 #undef __FUNCT__ 2511 #define __FUNCT__ "MatISGetMPIXAIJ" 2512 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M) 2513 { 2514 Mat new_mat; 2515 Mat_IS *matis = (Mat_IS*)(mat->data); 2516 /* info on mat */ 2517 /* ISLocalToGlobalMapping rmapping,cmapping; */ 2518 PetscInt bs,rows,cols; 2519 PetscInt lrows,lcols; 2520 PetscInt local_rows,local_cols; 2521 PetscBool isdense; 2522 /* values insertion */ 2523 PetscScalar *array; 2524 PetscInt *local_indices,*global_indices; 2525 /* work */ 2526 PetscInt i,j,index_row; 2527 PetscErrorCode ierr; 2528 2529 PetscFunctionBegin; 2530 /* MISSING CHECKS 2531 - rectangular case not covered (it is not allowed by MATIS) 2532 */ 2533 /* get info from mat */ 2534 /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */ 2535 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2536 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2537 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2538 2539 /* work */ 2540 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2541 for (i=0;i<local_rows;i++) local_indices[i]=i; 2542 /* map indices of local mat to global values */ 2543 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2544 /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */ 2545 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2546 2547 if (reuse==MAT_INITIAL_MATRIX) { 2548 Vec vec_dnz,vec_onz; 2549 PetscScalar *my_dnz,*my_onz; 2550 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2551 PetscInt index_col,owner; 2552 PetscMPIInt nsubdomains; 2553 2554 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2555 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2556 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2557 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2558 ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr); 2559 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2560 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2561 2562 /* 2563 preallocation 2564 */ 2565 2566 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2567 /* 2568 Some vectors are needed to sum up properly on shared interface dofs. 2569 Preallocation macros cannot do the job. 2570 Note that preallocation is not exact, since it overestimates nonzeros 2571 */ 2572 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2573 /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */ 2574 ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr); 2575 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2576 /* All processes need to compute entire row ownership */ 2577 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2578 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2579 for (i=0;i<nsubdomains;i++) { 2580 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2581 row_ownership[j]=i; 2582 } 2583 } 2584 2585 /* 2586 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2587 then, they will be summed up properly. This way, preallocation is always sufficient 2588 */ 2589 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2590 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2591 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2592 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2593 for (i=0;i<local_rows;i++) { 2594 index_row = global_indices[i]; 2595 for (j=i;j<local_rows;j++) { 2596 owner = row_ownership[index_row]; 2597 index_col = global_indices[j]; 2598 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2599 my_dnz[i] += 1.0; 2600 } else { /* offdiag block */ 2601 my_onz[i] += 1.0; 2602 } 2603 /* same as before, interchanging rows and cols */ 2604 if (i != j) { 2605 owner = row_ownership[index_col]; 2606 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2607 my_dnz[j] += 1.0; 2608 } else { 2609 my_onz[j] += 1.0; 2610 } 2611 } 2612 } 2613 } 2614 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2615 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2616 if (local_rows) { /* multilevel guard */ 2617 ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2618 ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2619 } 2620 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2621 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2622 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2623 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2624 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2625 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2626 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2627 2628 /* set computed preallocation in dnz and onz */ 2629 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2630 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2631 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2632 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2633 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2634 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2635 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2636 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2637 2638 /* Resize preallocation if overestimated */ 2639 for (i=0;i<lrows;i++) { 2640 dnz[i] = PetscMin(dnz[i],lcols); 2641 onz[i] = PetscMin(onz[i],cols-lcols); 2642 } 2643 /* set preallocation */ 2644 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2645 for (i=0;i<lrows/bs;i++) { 2646 dnz[i] = dnz[i*bs]/bs; 2647 onz[i] = onz[i*bs]/bs; 2648 } 2649 ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2650 for (i=0;i<lrows/bs;i++) { 2651 dnz[i] = dnz[i]-i; 2652 } 2653 ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2654 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2655 *M = new_mat; 2656 } else { 2657 PetscInt mbs,mrows,mcols; 2658 /* some checks */ 2659 ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); 2660 ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); 2661 if (mrows != rows) { 2662 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); 2663 } 2664 if (mrows != rows) { 2665 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); 2666 } 2667 if (mbs != bs) { 2668 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); 2669 } 2670 ierr = MatZeroEntries(*M);CHKERRQ(ierr); 2671 } 2672 /* set local to global mappings */ 2673 /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */ 2674 /* Set values */ 2675 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2676 if (isdense) { /* special case for dense local matrices */ 2677 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2678 ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr); 2679 ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2680 ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr); 2681 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2682 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2683 } else { /* very basic values insertion for all other matrix types */ 2684 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2685 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2686 for (i=0;i<local_rows;i++) { 2687 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2688 /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */ 2689 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2690 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2691 ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2692 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2693 } 2694 } 2695 ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2696 ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2697 if (isdense) { 2698 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2699 } 2700 PetscFunctionReturn(0); 2701 } 2702 2703 #undef __FUNCT__ 2704 #define __FUNCT__ "MatISSubassemble_Private" 2705 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2706 { 2707 Mat subdomain_adj; 2708 IS new_ranks,ranks_send_to; 2709 MatPartitioning partitioner; 2710 Mat_IS *matis; 2711 PetscInt n_neighs,*neighs,*n_shared,**shared; 2712 PetscInt prank; 2713 PetscMPIInt size,rank,color; 2714 PetscInt *xadj,*adjncy,*oldranks; 2715 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2716 PetscInt i,j,n_subdomains,local_size,threshold=0; 2717 PetscErrorCode ierr; 2718 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2719 PetscSubcomm subcomm; 2720 2721 PetscFunctionBegin; 2722 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2723 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2724 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2725 2726 /* Get info on mapping */ 2727 matis = (Mat_IS*)(mat->data); 2728 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2729 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2730 2731 /* build local CSR graph of subdomains' connectivity */ 2732 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2733 xadj[0] = 0; 2734 xadj[1] = PetscMax(n_neighs-1,0); 2735 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2736 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2737 2738 if (threshold) { 2739 PetscInt* count,min_threshold; 2740 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2741 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2742 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2743 for (j=0;j<n_shared[i];j++) { 2744 count[shared[i][j]] += 1; 2745 } 2746 } 2747 /* adapt threshold since we dont want to lose significant connections */ 2748 min_threshold = n_neighs; 2749 for (i=1;i<n_neighs;i++) { 2750 for (j=0;j<n_shared[i];j++) { 2751 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2752 } 2753 } 2754 threshold = PetscMax(min_threshold+1,threshold); 2755 xadj[1] = 0; 2756 for (i=1;i<n_neighs;i++) { 2757 for (j=0;j<n_shared[i];j++) { 2758 if (count[shared[i][j]] < threshold) { 2759 adjncy[xadj[1]] = neighs[i]; 2760 adjncy_wgt[xadj[1]] = n_shared[i]; 2761 xadj[1]++; 2762 break; 2763 } 2764 } 2765 } 2766 ierr = PetscFree(count);CHKERRQ(ierr); 2767 } else { 2768 if (xadj[1]) { 2769 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2770 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2771 } 2772 } 2773 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2774 if (use_square) { 2775 for (i=0;i<xadj[1];i++) { 2776 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2777 } 2778 } 2779 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2780 2781 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2782 2783 /* 2784 Restrict work on active processes only. 2785 */ 2786 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2787 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2788 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2789 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2790 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2791 if (color) { 2792 ierr = PetscFree(xadj);CHKERRQ(ierr); 2793 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2794 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2795 } else { 2796 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2797 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2798 prank = rank; 2799 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2800 for (i=0;i<size;i++) { 2801 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2802 } 2803 for (i=0;i<xadj[1];i++) { 2804 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2805 } 2806 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2807 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2808 n_subdomains = (PetscInt)size; 2809 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2810 2811 /* Partition */ 2812 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2813 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2814 if (use_vwgt) { 2815 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2816 v_wgt[0] = local_size; 2817 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2818 } 2819 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2820 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2821 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2822 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2823 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2824 2825 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2826 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2827 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2828 /* clean up */ 2829 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2830 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2831 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2832 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2833 } 2834 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2835 2836 /* assemble parallel IS for sends */ 2837 i = 1; 2838 if (color) i=0; 2839 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2840 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2841 2842 /* get back IS */ 2843 *is_sends = ranks_send_to; 2844 PetscFunctionReturn(0); 2845 } 2846 2847 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2848 2849 #undef __FUNCT__ 2850 #define __FUNCT__ "MatISSubassemble" 2851 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2852 { 2853 Mat local_mat,new_mat; 2854 Mat_IS *matis; 2855 IS is_sends_internal; 2856 PetscInt rows,cols; 2857 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2858 PetscBool ismatis,isdense; 2859 ISLocalToGlobalMapping l2gmap; 2860 PetscInt* l2gmap_indices; 2861 const PetscInt* is_indices; 2862 MatType new_local_type; 2863 MatTypePrivate new_local_type_private; 2864 /* buffers */ 2865 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2866 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2867 /* MPI */ 2868 MPI_Comm comm; 2869 PetscMPIInt n_sends,n_recvs,commsize; 2870 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2871 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2872 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2873 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2874 PetscErrorCode ierr; 2875 2876 PetscFunctionBegin; 2877 /* checks */ 2878 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2879 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2880 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2881 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2882 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2883 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2884 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2885 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2886 PetscValidLogicalCollectiveInt(mat,bs,0); 2887 /* prepare IS for sending if not provided */ 2888 if (!is_sends) { 2889 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2890 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2891 } else { 2892 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2893 is_sends_internal = is_sends; 2894 } 2895 2896 /* get pointer of MATIS data */ 2897 matis = (Mat_IS*)mat->data; 2898 2899 /* get comm */ 2900 comm = PetscObjectComm((PetscObject)mat); 2901 2902 /* compute number of sends */ 2903 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2904 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2905 2906 /* compute number of receives */ 2907 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2908 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2909 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2910 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2911 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2912 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2913 ierr = PetscFree(iflags);CHKERRQ(ierr); 2914 2915 /* prepare send/receive buffers */ 2916 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2917 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2918 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2919 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2920 2921 /* Get data from local mat */ 2922 if (!isdense) { 2923 /* TODO: See below some guidelines on how to prepare the local buffers */ 2924 /* 2925 send_buffer_vals should contain the raw values of the local matrix 2926 send_buffer_idxs should contain: 2927 - MatType_PRIVATE type 2928 - PetscInt size_of_l2gmap 2929 - PetscInt global_row_indices[size_of_l2gmap] 2930 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2931 */ 2932 } else { 2933 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2934 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2935 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2936 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2937 send_buffer_idxs[1] = i; 2938 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2939 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2940 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2941 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2942 for (i=0;i<n_sends;i++) { 2943 ilengths_vals[is_indices[i]] = len*len; 2944 ilengths_idxs[is_indices[i]] = len+2; 2945 } 2946 } 2947 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2948 buf_size_idxs = 0; 2949 buf_size_vals = 0; 2950 for (i=0;i<n_recvs;i++) { 2951 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2952 buf_size_vals += (PetscInt)olengths_vals[i]; 2953 } 2954 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2955 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2956 2957 /* get new tags for clean communications */ 2958 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2959 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2960 2961 /* allocate for requests */ 2962 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2963 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2964 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2965 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2966 2967 /* communications */ 2968 ptr_idxs = recv_buffer_idxs; 2969 ptr_vals = recv_buffer_vals; 2970 for (i=0;i<n_recvs;i++) { 2971 source_dest = onodes[i]; 2972 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2973 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2974 ptr_idxs += olengths_idxs[i]; 2975 ptr_vals += olengths_vals[i]; 2976 } 2977 for (i=0;i<n_sends;i++) { 2978 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2979 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2980 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2981 } 2982 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2983 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2984 2985 /* assemble new l2g map */ 2986 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2987 ptr_idxs = recv_buffer_idxs; 2988 buf_size_idxs = 0; 2989 for (i=0;i<n_recvs;i++) { 2990 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2991 ptr_idxs += olengths_idxs[i]; 2992 } 2993 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2994 ptr_idxs = recv_buffer_idxs; 2995 buf_size_idxs = 0; 2996 for (i=0;i<n_recvs;i++) { 2997 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2998 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2999 ptr_idxs += olengths_idxs[i]; 3000 } 3001 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3002 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3003 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3004 3005 /* infer new local matrix type from received local matrices type */ 3006 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3007 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3008 new_local_type_private = MATAIJ_PRIVATE; 3009 new_local_type = MATSEQAIJ; 3010 if (n_recvs) { 3011 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3012 ptr_idxs = recv_buffer_idxs; 3013 for (i=0;i<n_recvs;i++) { 3014 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3015 new_local_type_private = MATAIJ_PRIVATE; 3016 break; 3017 } 3018 ptr_idxs += olengths_idxs[i]; 3019 } 3020 switch (new_local_type_private) { 3021 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 3022 new_local_type = MATSEQAIJ; 3023 bs = 1; 3024 break; 3025 case MATAIJ_PRIVATE: 3026 new_local_type = MATSEQAIJ; 3027 bs = 1; 3028 break; 3029 case MATBAIJ_PRIVATE: 3030 new_local_type = MATSEQBAIJ; 3031 break; 3032 case MATSBAIJ_PRIVATE: 3033 new_local_type = MATSEQSBAIJ; 3034 break; 3035 default: 3036 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3037 break; 3038 } 3039 } 3040 3041 /* create MATIS object */ 3042 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3043 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 3044 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3045 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 3046 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3047 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3048 3049 /* set values */ 3050 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3051 ptr_vals = recv_buffer_vals; 3052 ptr_idxs = recv_buffer_idxs; 3053 for (i=0;i<n_recvs;i++) { 3054 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3055 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3056 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3057 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3058 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3059 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3060 } 3061 ptr_idxs += olengths_idxs[i]; 3062 ptr_vals += olengths_vals[i]; 3063 } 3064 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3065 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3066 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3067 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3068 3069 { /* check */ 3070 Vec lvec,rvec; 3071 PetscReal infty_error; 3072 3073 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3074 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3075 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3076 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3077 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 3078 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3079 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3080 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3081 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3082 } 3083 3084 /* free workspace */ 3085 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3086 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3087 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3088 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3089 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3090 if (isdense) { 3091 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3092 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3093 } else { 3094 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3095 } 3096 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3097 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3098 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3099 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3100 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3101 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3102 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3103 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3104 ierr = PetscFree(onodes);CHKERRQ(ierr); 3105 /* get back new mat */ 3106 *mat_n = new_mat; 3107 PetscFunctionReturn(0); 3108 } 3109 3110 #undef __FUNCT__ 3111 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3112 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3113 { 3114 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3115 PC_IS *pcis = (PC_IS*)pc->data; 3116 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3117 MatNullSpace CoarseNullSpace=NULL; 3118 ISLocalToGlobalMapping coarse_islg; 3119 IS coarse_is; 3120 PetscInt max_it; 3121 PetscInt im_active=-1,active_procs=-1; 3122 PC pc_temp; 3123 PCType coarse_pc_type; 3124 KSPType coarse_ksp_type; 3125 PetscBool multilevel_requested,multilevel_allowed; 3126 PetscBool setsym,issym,isbddc,isnn,coarse_reuse; 3127 MatStructure matstruct; 3128 PetscErrorCode ierr; 3129 3130 PetscFunctionBegin; 3131 /* Assign global numbering to coarse dofs */ 3132 if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */ 3133 PetscInt ocoarse_size; 3134 ocoarse_size = pcbddc->coarse_size; 3135 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3136 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3137 /* see if we can avoid some work */ 3138 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3139 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3140 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3141 coarse_reuse = PETSC_FALSE; 3142 } else { /* we can safely reuse already computed coarse matrix */ 3143 coarse_reuse = PETSC_TRUE; 3144 } 3145 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3146 coarse_reuse = PETSC_FALSE; 3147 } 3148 } else { /* primal space has not been changed, so we can reuse coarse matrix */ 3149 coarse_reuse = PETSC_TRUE; 3150 } 3151 3152 /* infer some info from user */ 3153 issym = PETSC_FALSE; 3154 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3155 multilevel_allowed = PETSC_FALSE; 3156 multilevel_requested = PETSC_FALSE; 3157 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 3158 if (multilevel_requested) { 3159 /* count "active processes" */ 3160 im_active = !!(pcis->n); 3161 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3162 if (active_procs/pcbddc->coarsening_ratio < 2) { 3163 multilevel_allowed = PETSC_FALSE; 3164 } else { 3165 multilevel_allowed = PETSC_TRUE; 3166 } 3167 } 3168 3169 /* set defaults for coarse KSP and PC */ 3170 if (multilevel_allowed) { 3171 if (issym) { 3172 coarse_ksp_type = KSPRICHARDSON; 3173 } else { 3174 coarse_ksp_type = KSPCHEBYSHEV; 3175 } 3176 coarse_pc_type = PCBDDC; 3177 } else { 3178 coarse_ksp_type = KSPPREONLY; 3179 coarse_pc_type = PCREDUNDANT; 3180 } 3181 3182 /* create the coarse KSP object only once with defaults */ 3183 if (!pcbddc->coarse_ksp) { 3184 char prefix[256],str_level[3]; 3185 size_t len; 3186 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3187 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3188 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3189 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3190 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3191 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3192 /* prefix */ 3193 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3194 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3195 if (!pcbddc->current_level) { 3196 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3197 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3198 } else { 3199 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3200 if (pcbddc->current_level>1) len -= 2; 3201 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3202 *(prefix+len)='\0'; 3203 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3204 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3205 } 3206 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3207 } 3208 /* allow user customization */ 3209 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3210 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3211 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3212 3213 /* get some info after set from options */ 3214 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3215 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3216 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3217 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3218 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3219 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3220 isbddc = PETSC_FALSE; 3221 } 3222 3223 /* propagate BDDC info to the next level */ 3224 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3225 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3226 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3227 3228 /* creates temporary MATIS object for coarse matrix */ 3229 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3230 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3231 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3232 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3233 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3234 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3235 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3236 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3237 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3238 3239 /* assemble coarse matrix */ 3240 if (isbddc || isnn) { 3241 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3242 } else { 3243 if (coarse_reuse) { 3244 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3245 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3246 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr); 3247 } else { 3248 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3249 } 3250 } 3251 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3252 3253 /* create local to global scatters for coarse problem */ 3254 if (pcbddc->new_primal_space) { 3255 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3256 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 3257 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3258 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3259 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3260 } 3261 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3262 3263 /* propagate symmetry info to coarse matrix */ 3264 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3265 3266 /* Compute coarse null space (special handling by BDDC only) */ 3267 if (pcbddc->NullSpace) { 3268 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3269 if (isbddc) { 3270 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3271 } else { 3272 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3273 } 3274 } 3275 3276 /* set operators */ 3277 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3278 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3279 3280 /* additional KSP customization */ 3281 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3282 if (max_it < 5) { 3283 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3284 } 3285 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3286 3287 3288 /* print some info if requested */ 3289 if (pcbddc->dbg_flag) { 3290 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3291 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3292 if (!multilevel_allowed) { 3293 if (multilevel_requested) { 3294 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3295 } else if (pcbddc->max_levels) { 3296 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3297 } 3298 } 3299 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3300 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3301 } 3302 3303 /* setup coarse ksp */ 3304 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3305 if (pcbddc->dbg_flag) { 3306 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3307 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3308 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3309 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3310 } 3311 3312 /* Check coarse problem if requested */ 3313 if (pcbddc->dbg_flag) { 3314 KSP check_ksp; 3315 KSPType check_ksp_type; 3316 PC check_pc; 3317 Vec check_vec; 3318 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3319 PetscInt its; 3320 PetscBool ispreonly,compute; 3321 3322 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3323 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3324 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3325 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3326 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3327 if (ispreonly) { 3328 check_ksp_type = KSPPREONLY; 3329 compute = PETSC_FALSE; 3330 } else { 3331 if (issym) check_ksp_type = KSPCG; 3332 else check_ksp_type = KSPGMRES; 3333 compute = PETSC_TRUE; 3334 } 3335 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3336 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3337 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3338 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3339 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3340 /* create random vec */ 3341 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3342 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3343 if (CoarseNullSpace) { 3344 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3345 } 3346 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3347 /* solve coarse problem */ 3348 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3349 if (CoarseNullSpace) { 3350 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3351 } 3352 /* check coarse problem residual error */ 3353 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3354 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3355 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3356 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3357 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3358 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3359 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3360 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3361 /* get eigenvalue estimation if preonly has not been requested */ 3362 if (!ispreonly) { 3363 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3364 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3365 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3366 } 3367 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3368 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3369 } 3370 /* free memory */ 3371 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3372 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3373 PetscFunctionReturn(0); 3374 } 3375 3376 #undef __FUNCT__ 3377 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3378 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3379 { 3380 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3381 PC_IS* pcis = (PC_IS*)pc->data; 3382 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3383 PetscInt i,coarse_size; 3384 PetscInt *local_primal_indices; 3385 PetscErrorCode ierr; 3386 3387 PetscFunctionBegin; 3388 /* Compute global number of coarse dofs */ 3389 if (!pcbddc->primal_indices_local_idxs) { 3390 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 3391 } 3392 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3393 3394 /* check numbering */ 3395 if (pcbddc->dbg_flag) { 3396 PetscScalar coarsesum,*array; 3397 3398 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3399 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3400 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3401 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3402 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3403 for (i=0;i<pcbddc->local_primal_size;i++) { 3404 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3405 } 3406 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3407 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3408 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3409 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3410 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3411 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3412 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3413 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3414 for (i=0;i<pcis->n;i++) { 3415 if (array[i] == 1.0) { 3416 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3417 } 3418 } 3419 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3420 for (i=0;i<pcis->n;i++) { 3421 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3422 } 3423 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3424 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3425 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3426 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3427 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3428 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3429 if (pcbddc->dbg_flag > 1) { 3430 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3431 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3432 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3433 for (i=0;i<pcbddc->local_primal_size;i++) { 3434 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3435 } 3436 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3437 } 3438 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3439 } 3440 /* get back data */ 3441 *coarse_size_n = coarse_size; 3442 *local_primal_indices_n = local_primal_indices; 3443 PetscFunctionReturn(0); 3444 } 3445 3446