1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 23 24 /* Change global null space passed in by the user if change of basis has been requested */ 25 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 26 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 27 } 28 29 /* 30 Setup local correction and local part of coarse basis. 31 Gives back the dense local part of the coarse matrix in column major ordering 32 */ 33 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 34 35 /* Compute total number of coarse nodes and setup coarse solver */ 36 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 37 38 /* free */ 39 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 40 PetscFunctionReturn(0); 41 } 42 43 #undef __FUNCT__ 44 #define __FUNCT__ "PCBDDCResetCustomization" 45 PetscErrorCode PCBDDCResetCustomization(PC pc) 46 { 47 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 48 PetscInt i; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 57 for (i=0;i<pcbddc->n_ISForDofs;i++) { 58 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 59 } 60 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 61 pcbddc->n_ISForDofs = 0; 62 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 63 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "PCBDDCResetTopography" 69 PetscErrorCode PCBDDCResetTopography(PC pc) 70 { 71 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 72 PetscErrorCode ierr; 73 74 PetscFunctionBegin; 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 107 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 108 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 110 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 111 PetscFunctionReturn(0); 112 } 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 116 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 117 { 118 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 119 PC_IS *pcis = (PC_IS*)pc->data; 120 VecType impVecType; 121 PetscInt n_vertices,n_constraints,local_primal_size,n_R,old_size; 122 PetscErrorCode ierr; 123 124 PetscFunctionBegin; 125 if (!pcbddc->ConstraintMatrix) { 126 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 127 } 128 /* get sizes */ 129 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,NULL);CHKERRQ(ierr); 130 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 131 n_constraints = local_primal_size - n_vertices; 132 n_R = pcis->n-n_vertices; 133 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 134 /* local work vectors (try to avoid unneeded work)*/ 135 /* R nodes */ 136 old_size = -1; 137 if (pcbddc->vec1_R) { 138 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 139 } 140 if (n_R != old_size) { 141 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 142 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 143 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 144 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 145 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 146 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 147 } 148 /* local primal dofs */ 149 old_size = -1; 150 if (pcbddc->vec1_P) { 151 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 152 } 153 if (local_primal_size != old_size) { 154 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 155 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 156 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 157 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 158 } 159 /* local explicit constraints */ 160 old_size = -1; 161 if (pcbddc->vec1_C) { 162 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 163 } 164 if (n_constraints && n_constraints != old_size) { 165 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 166 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 167 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 168 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "PCBDDCSetUpCorrection" 175 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 176 { 177 PetscErrorCode ierr; 178 /* pointers to pcis and pcbddc */ 179 PC_IS* pcis = (PC_IS*)pc->data; 180 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 181 /* submatrices of local problem */ 182 Mat A_RV,A_VR,A_VV; 183 /* working matrices */ 184 Mat M1,M2,M3,C_CR; 185 /* working vectors */ 186 Vec vec1_C,vec2_C,vec1_V,vec2_V; 187 /* additional working stuff */ 188 IS is_aux; 189 ISLocalToGlobalMapping BtoNmap; 190 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 191 const PetscScalar *array,*row_cmat_values; 192 const PetscInt *row_cmat_indices,*idx_R_local; 193 PetscInt *vertices,*idx_V_B,*auxindices; 194 PetscInt n_vertices,n_constraints,size_of_constraint; 195 PetscInt i,j,n_R,n_D,n_B; 196 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 197 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 198 MatType impMatType; 199 /* some shortcuts to scalars */ 200 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 201 /* for debugging purposes */ 202 PetscReal *coarsefunctions_errors,*constraints_errors; 203 204 PetscFunctionBegin; 205 /* get number of vertices and their local indices */ 206 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 207 n_constraints = pcbddc->local_primal_size-n_vertices; 208 /* Set Non-overlapping dimensions */ 209 n_B = pcis->n_B; n_D = pcis->n - n_B; 210 n_R = pcis->n-n_vertices; 211 212 /* Set types for local objects needed by BDDC precondtioner */ 213 impMatType = MATSEQDENSE; 214 215 /* Allocating some extra storage just to be safe */ 216 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 217 for (i=0;i<pcis->n;i++) auxindices[i]=i; 218 219 /* vertices in boundary numbering */ 220 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 221 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 222 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 223 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 224 if (i != n_vertices) { 225 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 226 } 227 228 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 229 if (n_constraints) { 230 /* see if we can save some allocations */ 231 if (pcbddc->local_auxmat2) { 232 PetscInt on_R,on_constraints; 233 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 234 if (on_R != n_R || on_constraints != n_constraints) { 235 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 236 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 237 } 238 } 239 /* work vectors */ 240 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 241 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 242 /* auxiliary matrices */ 243 if (!pcbddc->local_auxmat2) { 244 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 245 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 246 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 247 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 248 } 249 250 /* Extract constraints on R nodes: C_{CR} */ 251 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 252 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 253 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 254 255 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 256 for (i=0;i<n_constraints;i++) { 257 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 258 /* Get row of constraint matrix in R numbering */ 259 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 260 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 261 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 262 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 263 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 264 /* Solve for row of constraint matrix in R numbering */ 265 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 266 /* Set values in local_auxmat2 */ 267 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 268 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 269 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 270 } 271 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 272 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 273 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 274 275 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 276 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 277 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 278 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 279 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 280 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 281 ierr = MatSetUp(M1);CHKERRQ(ierr); 282 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 283 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 284 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 285 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 286 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 287 ierr = MatDestroy(&M2);CHKERRQ(ierr); 288 ierr = MatDestroy(&M3);CHKERRQ(ierr); 289 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 290 if (!pcbddc->local_auxmat1) { 291 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 292 } else { 293 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 294 } 295 } 296 297 /* Get submatrices from subdomain matrix */ 298 if (n_vertices) { 299 PetscInt ibs,mbs; 300 PetscBool issbaij; 301 Mat newmat; 302 303 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 304 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 305 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 306 if (ibs != mbs) { /* need to convert to SEQAIJ */ 307 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 308 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 309 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 310 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 311 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 312 } else { 313 /* this is safe */ 314 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 315 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 316 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 317 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 318 /* which of the two approaches is faster? */ 319 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 320 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 321 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 322 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 323 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 324 } else { 325 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 326 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 327 } 328 } 329 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 330 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 331 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 332 } 333 334 /* Matrix of coarse basis functions (local) */ 335 if (pcbddc->coarse_phi_B) { 336 PetscInt on_B,on_primal; 337 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 338 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 339 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 340 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 341 } 342 } 343 if (pcbddc->coarse_phi_D) { 344 PetscInt on_D,on_primal; 345 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 346 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 347 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 348 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 349 } 350 } 351 if (!pcbddc->coarse_phi_B) { 352 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 353 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 354 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 355 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 356 } 357 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 358 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 359 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 360 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 361 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 362 } 363 364 if (pcbddc->dbg_flag) { 365 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 366 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 367 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 368 } 369 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 370 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 371 372 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 373 374 /* vertices */ 375 for (i=0;i<n_vertices;i++) { 376 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 377 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 378 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 379 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 380 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 381 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 382 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 383 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 384 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 385 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 386 if (n_constraints) { 387 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 388 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 389 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 390 } 391 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 392 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 393 394 /* Set values in coarse basis function and subdomain part of coarse_mat */ 395 /* coarse basis functions */ 396 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 397 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 398 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 399 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 400 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 401 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 402 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 403 if (pcbddc->switch_static || pcbddc->dbg_flag) { 404 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 405 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 406 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 407 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 408 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 409 } 410 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 411 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 412 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 413 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 414 if (n_constraints) { 415 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 416 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 417 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 418 } 419 420 /* check */ 421 if (pcbddc->dbg_flag) { 422 /* assemble subdomain vector on local nodes */ 423 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 424 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 425 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 426 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 427 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 428 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 429 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 430 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 431 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 432 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 433 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 434 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 435 if (n_constraints) { 436 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 437 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 438 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 439 } 440 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 441 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 442 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 443 /* check saddle point solution */ 444 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 445 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 446 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 447 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 448 /* shift by the identity matrix */ 449 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 450 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 451 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 452 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 453 } 454 } 455 456 /* constraints */ 457 for (i=0;i<n_constraints;i++) { 458 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 459 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 460 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 461 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 462 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 463 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 464 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 465 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 466 if (n_vertices) { 467 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 468 } 469 /* Set values in coarse basis function and subdomain part of coarse_mat */ 470 /* coarse basis functions */ 471 j = i+n_vertices; /* don't touch this! */ 472 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 473 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 474 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 475 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 476 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 477 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 478 if (pcbddc->switch_static || pcbddc->dbg_flag) { 479 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 480 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 481 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 482 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 483 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 484 } 485 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 486 if (n_vertices) { 487 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 488 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 489 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 490 } 491 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 492 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 493 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 494 495 if (pcbddc->dbg_flag) { 496 /* assemble subdomain vector on nodes */ 497 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 498 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 499 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 500 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 501 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 502 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 503 /* assemble subdomain vector of lagrange multipliers */ 504 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 505 if (n_vertices) { 506 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 507 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 508 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 509 } 510 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 511 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 512 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 513 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 514 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 515 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 516 /* check saddle point solution */ 517 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 518 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 519 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 520 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 521 /* shift by the identity matrix */ 522 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 523 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 524 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 525 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 526 } 527 } 528 /* call assembling routines for local coarse basis */ 529 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 530 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 if (pcbddc->switch_static || pcbddc->dbg_flag) { 532 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 533 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 534 } 535 536 /* compute other basis functions for non-symmetric problems */ 537 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 538 if (!setsym || (setsym && !issym)) { 539 if (!pcbddc->coarse_psi_B) { 540 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 541 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 542 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 543 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 544 } 545 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 546 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 547 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 548 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 549 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 550 } 551 for (i=0;i<pcbddc->local_primal_size;i++) { 552 if (n_constraints) { 553 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 554 for (j=0;j<n_constraints;j++) { 555 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 556 } 557 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 558 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 559 } 560 if (i<n_vertices) { 561 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 562 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 563 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 564 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 565 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 566 if (n_constraints) { 567 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 568 } 569 } else { 570 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 571 } 572 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 573 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 574 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 575 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 576 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 577 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 578 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 579 if (i<n_vertices) { 580 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 581 } 582 if (pcbddc->switch_static || pcbddc->dbg_flag) { 583 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 584 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 585 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 586 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 587 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 588 } 589 590 if (pcbddc->dbg_flag) { 591 /* assemble subdomain vector on nodes */ 592 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 593 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 594 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 595 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 596 if (i<n_vertices) { 597 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 598 } 599 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 600 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 601 /* assemble subdomain vector of lagrange multipliers */ 602 for (j=0;j<pcbddc->local_primal_size;j++) { 603 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 604 } 605 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 606 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 607 /* check saddle point solution */ 608 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 609 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 610 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 611 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 612 /* shift by the identity matrix */ 613 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 614 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 615 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 616 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 617 } 618 } 619 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 620 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 621 if (pcbddc->switch_static || pcbddc->dbg_flag) { 622 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 623 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 624 } 625 } 626 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 627 /* Checking coarse_sub_mat and coarse basis functios */ 628 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 629 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 630 if (pcbddc->dbg_flag) { 631 Mat coarse_sub_mat; 632 Mat AUXMAT,TM1,TM2,TM3,TM4; 633 Mat coarse_phi_D,coarse_phi_B; 634 Mat coarse_psi_D,coarse_psi_B; 635 Mat A_II,A_BB,A_IB,A_BI; 636 MatType checkmattype=MATSEQAIJ; 637 PetscReal real_value; 638 639 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 640 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 641 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 642 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 643 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 644 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 645 if (pcbddc->coarse_psi_B) { 646 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 647 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 648 } 649 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 650 651 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 652 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 653 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 654 if (pcbddc->coarse_psi_B) { 655 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 656 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 657 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 658 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 659 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 660 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 661 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 662 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 663 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 664 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 665 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 666 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 667 } else { 668 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 669 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 670 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 671 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 672 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 673 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 674 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 675 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 676 } 677 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 678 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 679 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 680 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 681 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 682 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 683 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 684 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 685 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 686 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 687 for (i=0;i<pcbddc->local_primal_size;i++) { 688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 689 } 690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 691 for (i=0;i<pcbddc->local_primal_size;i++) { 692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 693 } 694 if (pcbddc->coarse_psi_B) { 695 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 696 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 697 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 698 } 699 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 700 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 701 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 702 } 703 } 704 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 705 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 706 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 707 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 708 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 709 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 710 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 711 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 712 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 713 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 714 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 715 if (pcbddc->coarse_psi_B) { 716 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 717 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 718 } 719 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 720 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 721 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 722 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 723 } 724 /* free memory */ 725 if (n_vertices) { 726 ierr = PetscFree(vertices);CHKERRQ(ierr); 727 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 728 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 729 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 730 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 731 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 732 } 733 if (n_constraints) { 734 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 735 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 736 ierr = MatDestroy(&M1);CHKERRQ(ierr); 737 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 738 } 739 ierr = PetscFree(auxindices);CHKERRQ(ierr); 740 /* get back data */ 741 *coarse_submat_vals_n = coarse_submat_vals; 742 PetscFunctionReturn(0); 743 } 744 745 #undef __FUNCT__ 746 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 747 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 748 { 749 PC_IS* pcis = (PC_IS*)(pc->data); 750 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 751 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 752 PetscBool issbaij,isbaij; 753 /* manage repeated solves */ 754 MatReuse reuse; 755 MatStructure matstruct; 756 PetscErrorCode ierr; 757 758 PetscFunctionBegin; 759 if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) { 760 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 761 } 762 /* get mat flags */ 763 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 764 reuse = MAT_INITIAL_MATRIX; 765 if (pc->setupcalled) { 766 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 767 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 768 if (matstruct == SAME_NONZERO_PATTERN) { 769 reuse = MAT_REUSE_MATRIX; 770 } else { 771 reuse = MAT_INITIAL_MATRIX; 772 } 773 } 774 if (reuse == MAT_INITIAL_MATRIX) { 775 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 776 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 777 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 778 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 779 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 780 } 781 782 /* transform local matrices if needed */ 783 if (pcbddc->use_change_of_basis) { 784 Mat change_mat_all; 785 PetscScalar *row_cmat_values; 786 PetscInt *row_cmat_indices; 787 PetscInt *nnz,*is_indices,*temp_indices; 788 PetscInt i,j,k,n_D,n_B; 789 790 /* Get Non-overlapping dimensions */ 791 n_B = pcis->n_B; 792 n_D = pcis->n-n_B; 793 794 /* compute nonzero structure of change of basis on all local nodes */ 795 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 796 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 797 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 798 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 799 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 800 k=1; 801 for (i=0;i<n_B;i++) { 802 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 803 nnz[is_indices[i]]=j; 804 if (k < j) k = j; 805 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 806 } 807 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 808 /* assemble change of basis matrix on the whole set of local dofs */ 809 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 810 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 811 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 812 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 813 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 814 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 815 for (i=0;i<n_D;i++) { 816 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 817 } 818 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 819 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 820 for (i=0;i<n_B;i++) { 821 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 822 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 823 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 824 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 825 } 826 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 827 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 828 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 829 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 830 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 831 if (!issbaij && !isbaij) { 832 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 833 } else { 834 Mat work_mat; 835 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 836 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 837 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 838 } 839 /* 840 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 841 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 842 */ 843 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 844 ierr = PetscFree(nnz);CHKERRQ(ierr); 845 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 846 } else { 847 /* without change of basis, the local matrix is unchanged */ 848 if (!pcbddc->local_mat) { 849 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 850 pcbddc->local_mat = matis->A; 851 } 852 } 853 854 /* get submatrices */ 855 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 856 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 857 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 858 if (!issbaij) { 859 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 860 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 861 } else { 862 Mat newmat; 863 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 864 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 865 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 866 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 867 } 868 PetscFunctionReturn(0); 869 } 870 871 #undef __FUNCT__ 872 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 873 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 874 { 875 PC_IS* pcis = (PC_IS*)(pc->data); 876 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 877 IS is_aux1,is_aux2; 878 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 879 PetscInt n_vertices,i,j,n_R,n_D,n_B; 880 PetscInt vbs,bs; 881 PetscBT bitmask; 882 PetscErrorCode ierr; 883 884 PetscFunctionBegin; 885 /* No need to setup local scatters if primal space is unchanged */ 886 if (!pcbddc->new_primal_space_local) { 887 PetscFunctionReturn(0); 888 } 889 /* destroy old objects */ 890 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 891 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 892 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 893 /* Set Non-overlapping dimensions */ 894 n_B = pcis->n_B; n_D = pcis->n - n_B; 895 /* get vertex indices from constraint matrix */ 896 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 897 898 /* create auxiliary bitmask */ 899 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 900 for (i=0;i<n_vertices;i++) { 901 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 902 } 903 904 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 905 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 906 for (i=0, n_R=0; i<pcis->n; i++) { 907 if (!PetscBTLookup(bitmask,i)) { 908 idx_R_local[n_R] = i; 909 n_R++; 910 } 911 } 912 913 /* Block code */ 914 vbs = 1; 915 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 916 if (bs>1 && !(n_vertices%bs)) { 917 PetscBool is_blocked = PETSC_TRUE; 918 PetscInt *vary; 919 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 920 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 921 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 922 for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++; 923 for (i=0; i<n_vertices; i++) { 924 if (vary[i]!=0 && vary[i]!=bs) { 925 is_blocked = PETSC_FALSE; 926 break; 927 } 928 } 929 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 930 vbs = bs; 931 for (i=0;i<n_R/vbs;i++) { 932 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 933 } 934 } 935 ierr = PetscFree(vary);CHKERRQ(ierr); 936 } 937 ierr = PetscFree(vertices);CHKERRQ(ierr); 938 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 939 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 940 941 /* print some info if requested */ 942 if (pcbddc->dbg_flag) { 943 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 944 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 945 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 946 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 947 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 948 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 949 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 950 } 951 952 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 953 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 954 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 955 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 956 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 957 for (i=0; i<n_D; i++) { 958 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 959 } 960 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 961 for (i=0, j=0; i<n_R; i++) { 962 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 963 aux_array1[j++] = i; 964 } 965 } 966 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 967 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 968 for (i=0, j=0; i<n_B; i++) { 969 if (!PetscBTLookup(bitmask,is_indices[i])) { 970 aux_array2[j++] = i; 971 } 972 } 973 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 974 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 975 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 976 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 977 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 978 979 if (pcbddc->switch_static || pcbddc->dbg_flag) { 980 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 981 for (i=0, j=0; i<n_R; i++) { 982 if (PetscBTLookup(bitmask,idx_R_local[i])) { 983 aux_array1[j++] = i; 984 } 985 } 986 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 987 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 988 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 989 } 990 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 991 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 992 PetscFunctionReturn(0); 993 } 994 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 998 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 999 { 1000 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1001 PC_IS *pcis = (PC_IS*)pc->data; 1002 PC pc_temp; 1003 Mat A_RR; 1004 Vec vec1,vec2,vec3; 1005 MatStructure matstruct; 1006 MatReuse reuse; 1007 PetscScalar m_one = -1.0; 1008 PetscReal value; 1009 PetscInt n_D,n_R,ibs,mbs; 1010 PetscBool use_exact,use_exact_reduced,issbaij; 1011 PetscErrorCode ierr; 1012 /* prefixes stuff */ 1013 char dir_prefix[256],neu_prefix[256],str_level[3]; 1014 size_t len; 1015 1016 PetscFunctionBegin; 1017 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 1018 1019 /* compute prefixes */ 1020 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1021 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1022 if (!pcbddc->current_level) { 1023 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1024 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1025 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1026 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1027 } else { 1028 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1029 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 1030 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1031 len -= 15; /* remove "pc_bddc_coarse_" */ 1032 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 1033 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1034 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1035 *(dir_prefix+len)='\0'; 1036 *(neu_prefix+len)='\0'; 1037 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1038 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1039 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1040 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1041 } 1042 1043 /* DIRICHLET PROBLEM */ 1044 /* Matrix for Dirichlet problem is pcis->A_II */ 1045 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1046 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1047 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1048 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1049 /* default */ 1050 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1051 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1052 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1053 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1054 if (issbaij) { 1055 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1056 } else { 1057 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1058 } 1059 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1060 } 1061 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 1062 /* Allow user's customization */ 1063 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1064 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1065 if (!n_D) { 1066 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1067 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1068 } 1069 /* Set Up KSP for Dirichlet problem of BDDC */ 1070 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1071 /* set ksp_D into pcis data */ 1072 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1073 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1074 pcis->ksp_D = pcbddc->ksp_D; 1075 1076 /* NEUMANN PROBLEM */ 1077 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1078 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1079 if (pcbddc->ksp_R) { /* already created ksp */ 1080 PetscInt nn_R; 1081 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr); 1082 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1083 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1084 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1085 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1086 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1087 reuse = MAT_INITIAL_MATRIX; 1088 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1089 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1090 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1091 reuse = MAT_INITIAL_MATRIX; 1092 } else { /* safe to reuse the matrix */ 1093 reuse = MAT_REUSE_MATRIX; 1094 } 1095 } 1096 /* last check */ 1097 if (matstruct == DIFFERENT_NONZERO_PATTERN) { 1098 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1099 reuse = MAT_INITIAL_MATRIX; 1100 } 1101 } else { /* first time, so we need to create the matrix */ 1102 reuse = MAT_INITIAL_MATRIX; 1103 } 1104 /* extract A_RR */ 1105 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1106 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1107 if (ibs != mbs) { 1108 Mat newmat; 1109 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1110 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1111 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1112 } else { 1113 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1114 } 1115 if (!pcbddc->ksp_R) { /* create object if not present */ 1116 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1117 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1118 /* default */ 1119 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1120 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1121 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1122 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1123 if (issbaij) { 1124 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1125 } else { 1126 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1127 } 1128 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1129 } 1130 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1131 /* Allow user's customization */ 1132 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1133 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1134 if (!n_R) { 1135 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1136 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1137 } 1138 /* Set Up KSP for Neumann problem of BDDC */ 1139 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1140 1141 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1142 1143 /* Dirichlet */ 1144 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 1145 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1146 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1147 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 1148 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 1149 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1150 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1151 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1152 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1153 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1154 /* need to be adapted? */ 1155 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1156 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1157 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1158 /* print info */ 1159 if (pcbddc->dbg_flag) { 1160 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1161 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1162 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1163 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1164 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1165 } 1166 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1167 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1168 } 1169 1170 /* Neumann */ 1171 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 1172 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1173 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1174 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 1175 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 1176 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1177 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1178 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1179 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1180 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1181 /* need to be adapted? */ 1182 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1183 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1184 /* print info */ 1185 if (pcbddc->dbg_flag) { 1186 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1187 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1188 } 1189 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1190 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1191 } 1192 /* free Neumann problem's matrix */ 1193 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1199 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1200 { 1201 PetscErrorCode ierr; 1202 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1203 1204 PetscFunctionBegin; 1205 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1206 if (pcbddc->local_auxmat1) { 1207 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1208 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1209 } 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1215 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1216 { 1217 PetscErrorCode ierr; 1218 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1219 PC_IS* pcis = (PC_IS*) (pc->data); 1220 const PetscScalar zero = 0.0; 1221 1222 PetscFunctionBegin; 1223 /* Application of PHI^T (or PSI^T) */ 1224 if (pcbddc->coarse_psi_B) { 1225 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1226 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1227 } else { 1228 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1229 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1230 } 1231 /* Scatter data of coarse_rhs */ 1232 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1233 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1234 1235 /* Local solution on R nodes */ 1236 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1237 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1238 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1239 if (pcbddc->switch_static) { 1240 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1241 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1242 } 1243 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1244 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1245 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1247 if (pcbddc->switch_static) { 1248 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1249 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1250 } 1251 1252 /* Coarse solution */ 1253 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1254 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1255 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1256 } 1257 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1258 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1259 1260 /* Sum contributions from two levels */ 1261 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1262 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1268 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1269 { 1270 PetscErrorCode ierr; 1271 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1272 1273 PetscFunctionBegin; 1274 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1280 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1281 { 1282 PetscErrorCode ierr; 1283 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1284 1285 PetscFunctionBegin; 1286 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1287 PetscFunctionReturn(0); 1288 } 1289 1290 /* uncomment for testing purposes */ 1291 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1294 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1295 { 1296 PetscErrorCode ierr; 1297 PC_IS* pcis = (PC_IS*)(pc->data); 1298 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1299 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1300 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1301 MatType impMatType=MATSEQAIJ; 1302 /* one and zero */ 1303 PetscScalar one=1.0,zero=0.0; 1304 /* space to store constraints and their local indices */ 1305 PetscScalar *temp_quadrature_constraint; 1306 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1307 /* iterators */ 1308 PetscInt i,j,k,total_counts,temp_start_ptr; 1309 /* stuff to store connected components stored in pcbddc->mat_graph */ 1310 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1311 PetscInt n_ISForFaces,n_ISForEdges; 1312 /* near null space stuff */ 1313 MatNullSpace nearnullsp; 1314 const Vec *nearnullvecs; 1315 Vec *localnearnullsp; 1316 PetscBool nnsp_has_cnst; 1317 PetscInt nnsp_size; 1318 PetscScalar *array; 1319 /* BLAS integers */ 1320 PetscBLASInt lwork,lierr; 1321 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1322 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1323 /* LAPACK working arrays for SVD or POD */ 1324 PetscBool skip_lapack; 1325 PetscScalar *work; 1326 PetscReal *singular_vals; 1327 #if defined(PETSC_USE_COMPLEX) 1328 PetscReal *rwork; 1329 #endif 1330 #if defined(PETSC_MISSING_LAPACK_GESVD) 1331 PetscBLASInt Blas_one_2=1; 1332 PetscScalar *temp_basis,*correlation_mat; 1333 #else 1334 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1335 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1336 #endif 1337 /* reuse */ 1338 PetscInt olocal_primal_size; 1339 PetscInt *oprimal_indices_local_idxs; 1340 /* change of basis */ 1341 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1342 PetscBool boolforchange,qr_needed; 1343 PetscBT touched,change_basis,qr_needed_idx; 1344 /* auxiliary stuff */ 1345 PetscInt *nnz,*is_indices,*local_to_B; 1346 /* some quantities */ 1347 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1348 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1349 1350 1351 PetscFunctionBegin; 1352 /* Destroy Mat objects computed previously */ 1353 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1354 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1355 /* Get index sets for faces, edges and vertices from graph */ 1356 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1357 pcbddc->use_vertices = PETSC_TRUE; 1358 } 1359 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1360 /* HACK: provide functions to set change of basis */ 1361 if (!ISForVertices && pcbddc->NullSpace) { 1362 pcbddc->use_change_of_basis = PETSC_TRUE; 1363 pcbddc->use_change_on_faces = PETSC_FALSE; 1364 } 1365 /* print some info */ 1366 if (pcbddc->dbg_flag) { 1367 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1368 i = 0; 1369 if (ISForVertices) { 1370 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1371 } 1372 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1373 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1374 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1375 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1376 } 1377 /* check if near null space is attached to global mat */ 1378 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1379 if (nearnullsp) { 1380 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1381 /* remove any stored info */ 1382 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1383 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1384 /* store information for BDDC solver reuse */ 1385 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1386 pcbddc->onearnullspace = nearnullsp; 1387 ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1388 for (i=0;i<nnsp_size;i++) { 1389 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1390 } 1391 } else { /* if near null space is not provided BDDC uses constants by default */ 1392 nnsp_size = 0; 1393 nnsp_has_cnst = PETSC_TRUE; 1394 } 1395 /* get max number of constraints on a single cc */ 1396 max_constraints = nnsp_size; 1397 if (nnsp_has_cnst) max_constraints++; 1398 1399 /* 1400 Evaluate maximum storage size needed by the procedure 1401 - temp_indices will contain start index of each constraint stored as follows 1402 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1403 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1404 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1405 */ 1406 total_counts = n_ISForFaces+n_ISForEdges; 1407 total_counts *= max_constraints; 1408 n_vertices = 0; 1409 if (ISForVertices) { 1410 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1411 } 1412 total_counts += n_vertices; 1413 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1414 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1415 total_counts = 0; 1416 max_size_of_constraint = 0; 1417 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1418 if (i<n_ISForEdges) { 1419 used_IS = &ISForEdges[i]; 1420 } else { 1421 used_IS = &ISForFaces[i-n_ISForEdges]; 1422 } 1423 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1424 total_counts += j; 1425 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1426 } 1427 total_counts *= max_constraints; 1428 total_counts += n_vertices; 1429 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1430 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1431 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1432 /* local to boundary numbering */ 1433 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1434 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1435 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1436 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1437 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1438 /* get local part of global near null space vectors */ 1439 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1440 for (k=0;k<nnsp_size;k++) { 1441 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1442 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1443 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1444 } 1445 1446 /* whether or not to skip lapack calls */ 1447 skip_lapack = PETSC_TRUE; 1448 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1449 1450 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1451 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1452 PetscScalar temp_work; 1453 #if defined(PETSC_MISSING_LAPACK_GESVD) 1454 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1455 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1456 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1457 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1458 #if defined(PETSC_USE_COMPLEX) 1459 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1460 #endif 1461 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1462 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1463 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1464 lwork = -1; 1465 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1466 #if !defined(PETSC_USE_COMPLEX) 1467 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1468 #else 1469 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1470 #endif 1471 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1472 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1473 #else /* on missing GESVD */ 1474 /* SVD */ 1475 PetscInt max_n,min_n; 1476 max_n = max_size_of_constraint; 1477 min_n = max_constraints; 1478 if (max_size_of_constraint < max_constraints) { 1479 min_n = max_size_of_constraint; 1480 max_n = max_constraints; 1481 } 1482 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1483 #if defined(PETSC_USE_COMPLEX) 1484 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1485 #endif 1486 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1487 lwork = -1; 1488 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1489 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1490 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1491 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1492 #if !defined(PETSC_USE_COMPLEX) 1493 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1494 #else 1495 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1496 #endif 1497 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1498 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1499 #endif /* on missing GESVD */ 1500 /* Allocate optimal workspace */ 1501 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1502 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1503 } 1504 /* Now we can loop on constraining sets */ 1505 total_counts = 0; 1506 temp_indices[0] = 0; 1507 /* vertices */ 1508 if (ISForVertices) { 1509 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1510 if (nnsp_has_cnst) { /* consider all vertices */ 1511 for (i=0;i<n_vertices;i++) { 1512 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1513 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1514 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1515 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1516 total_counts++; 1517 } 1518 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1519 PetscBool used_vertex; 1520 for (i=0;i<n_vertices;i++) { 1521 used_vertex = PETSC_FALSE; 1522 k = 0; 1523 while (!used_vertex && k<nnsp_size) { 1524 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1525 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1526 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1527 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1528 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1529 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1530 total_counts++; 1531 used_vertex = PETSC_TRUE; 1532 } 1533 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1534 k++; 1535 } 1536 } 1537 } 1538 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1539 n_vertices = total_counts; 1540 } 1541 1542 /* edges and faces */ 1543 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1544 if (i<n_ISForEdges) { 1545 used_IS = &ISForEdges[i]; 1546 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1547 } else { 1548 used_IS = &ISForFaces[i-n_ISForEdges]; 1549 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1550 } 1551 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1552 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1553 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1554 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1555 /* change of basis should not be performed on local periodic nodes */ 1556 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1557 if (nnsp_has_cnst) { 1558 PetscScalar quad_value; 1559 temp_constraints++; 1560 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1561 for (j=0;j<size_of_constraint;j++) { 1562 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1563 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1564 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1565 } 1566 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1567 total_counts++; 1568 } 1569 for (k=0;k<nnsp_size;k++) { 1570 PetscReal real_value; 1571 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1572 for (j=0;j<size_of_constraint;j++) { 1573 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1574 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1575 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1576 } 1577 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1578 /* check if array is null on the connected component */ 1579 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1580 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1581 if (real_value > 0.0) { /* keep indices and values */ 1582 temp_constraints++; 1583 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1584 total_counts++; 1585 } 1586 } 1587 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1588 valid_constraints = temp_constraints; 1589 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1590 if (!pcbddc->use_nnsp_true && temp_constraints) { 1591 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1592 1593 #if defined(PETSC_MISSING_LAPACK_GESVD) 1594 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1595 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1596 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1597 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1598 from that computed using LAPACKgesvd 1599 -> This is due to a different computation of eigenvectors in LAPACKheev 1600 -> The quality of the POD-computed basis will be the same */ 1601 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1602 /* Store upper triangular part of correlation matrix */ 1603 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1604 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1605 for (j=0;j<temp_constraints;j++) { 1606 for (k=0;k<j+1;k++) { 1607 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1608 } 1609 } 1610 /* compute eigenvalues and eigenvectors of correlation matrix */ 1611 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1612 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1613 #if !defined(PETSC_USE_COMPLEX) 1614 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1615 #else 1616 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1617 #endif 1618 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1619 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1620 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1621 j=0; 1622 while (j < temp_constraints && singular_vals[j] < tol) j++; 1623 total_counts=total_counts-j; 1624 valid_constraints = temp_constraints-j; 1625 /* scale and copy POD basis into used quadrature memory */ 1626 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1627 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1628 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1629 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1630 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1631 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1632 if (j<temp_constraints) { 1633 PetscInt ii; 1634 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1635 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1636 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1637 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1638 for (k=0;k<temp_constraints-j;k++) { 1639 for (ii=0;ii<size_of_constraint;ii++) { 1640 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1641 } 1642 } 1643 } 1644 #else /* on missing GESVD */ 1645 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1646 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1647 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1648 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1649 #if !defined(PETSC_USE_COMPLEX) 1650 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1651 #else 1652 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1653 #endif 1654 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1655 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1656 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1657 k = temp_constraints; 1658 if (k > size_of_constraint) k = size_of_constraint; 1659 j = 0; 1660 while (j < k && singular_vals[k-j-1] < tol) j++; 1661 total_counts = total_counts-temp_constraints+k-j; 1662 valid_constraints = k-j; 1663 #endif /* on missing GESVD */ 1664 } 1665 /* setting change_of_basis flag is safe now */ 1666 if (boolforchange) { 1667 for (j=0;j<valid_constraints;j++) { 1668 PetscBTSet(change_basis,total_counts-j-1); 1669 } 1670 } 1671 } 1672 /* free index sets of faces, edges and vertices */ 1673 for (i=0;i<n_ISForFaces;i++) { 1674 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1675 } 1676 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1677 for (i=0;i<n_ISForEdges;i++) { 1678 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1679 } 1680 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1681 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1682 1683 /* free workspace */ 1684 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1685 ierr = PetscFree(work);CHKERRQ(ierr); 1686 #if defined(PETSC_USE_COMPLEX) 1687 ierr = PetscFree(rwork);CHKERRQ(ierr); 1688 #endif 1689 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1690 #if defined(PETSC_MISSING_LAPACK_GESVD) 1691 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1692 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1693 #endif 1694 } 1695 for (k=0;k<nnsp_size;k++) { 1696 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1697 } 1698 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1699 1700 /* set quantities in pcbddc data structure and store previous primal size */ 1701 /* n_vertices defines the number of subdomain corners in the primal space */ 1702 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1703 olocal_primal_size = pcbddc->local_primal_size; 1704 pcbddc->local_primal_size = total_counts; 1705 pcbddc->n_vertices = n_vertices; 1706 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1707 1708 /* Create constraint matrix */ 1709 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1710 /* so we need to set it up properly either with or without change of basis */ 1711 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1712 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1713 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1714 /* array to compute a local numbering of constraints : vertices first then constraints */ 1715 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1716 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1717 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1718 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1719 /* auxiliary stuff for basis change */ 1720 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1721 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1722 1723 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1724 total_primal_vertices=0; 1725 for (i=0;i<pcbddc->local_primal_size;i++) { 1726 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1727 if (size_of_constraint == 1) { 1728 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1729 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1730 aux_primal_minloc[total_primal_vertices]=0; 1731 total_primal_vertices++; 1732 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1733 PetscInt min_loc,min_index; 1734 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1735 /* find first untouched local node */ 1736 k = 0; 1737 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1738 min_index = global_indices[k]; 1739 min_loc = k; 1740 /* search the minimum among global nodes already untouched on the cc */ 1741 for (k=1;k<size_of_constraint;k++) { 1742 /* there can be more than one constraint on a single connected component */ 1743 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1744 min_index = global_indices[k]; 1745 min_loc = k; 1746 } 1747 } 1748 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1749 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1750 aux_primal_minloc[total_primal_vertices]=min_loc; 1751 total_primal_vertices++; 1752 } 1753 } 1754 /* determine if a QR strategy is needed for change of basis */ 1755 qr_needed = PETSC_FALSE; 1756 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1757 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1758 if (PetscBTLookup(change_basis,i)) { 1759 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1760 j = 0; 1761 for (k=0;k<size_of_constraint;k++) { 1762 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1763 j++; 1764 } 1765 } 1766 /* found more than one primal dof on the cc */ 1767 if (j > 1) { 1768 PetscBTSet(qr_needed_idx,i); 1769 qr_needed = PETSC_TRUE; 1770 } 1771 } 1772 } 1773 /* free workspace */ 1774 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1775 1776 /* permute indices in order to have a sorted set of vertices */ 1777 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1778 1779 /* nonzero structure of constraint matrix */ 1780 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1781 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1782 j=total_primal_vertices; 1783 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1784 if (!PetscBTLookup(change_basis,i)) { 1785 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1786 j++; 1787 } 1788 } 1789 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1790 ierr = PetscFree(nnz);CHKERRQ(ierr); 1791 /* set values in constraint matrix */ 1792 for (i=0;i<total_primal_vertices;i++) { 1793 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1794 } 1795 total_counts = total_primal_vertices; 1796 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1797 if (!PetscBTLookup(change_basis,i)) { 1798 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1799 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1800 total_counts++; 1801 } 1802 } 1803 /* assembling */ 1804 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1805 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1806 /* 1807 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1808 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1809 */ 1810 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1811 if (pcbddc->use_change_of_basis) { 1812 /* dual and primal dofs on a single cc */ 1813 PetscInt dual_dofs,primal_dofs; 1814 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1815 PetscInt primal_counter; 1816 /* working stuff for GEQRF */ 1817 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1818 PetscBLASInt lqr_work; 1819 /* working stuff for UNGQR */ 1820 PetscScalar *gqr_work,lgqr_work_t; 1821 PetscBLASInt lgqr_work; 1822 /* working stuff for TRTRS */ 1823 PetscScalar *trs_rhs; 1824 PetscBLASInt Blas_NRHS; 1825 /* pointers for values insertion into change of basis matrix */ 1826 PetscInt *start_rows,*start_cols; 1827 PetscScalar *start_vals; 1828 /* working stuff for values insertion */ 1829 PetscBT is_primal; 1830 1831 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1832 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1833 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1834 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1835 /* work arrays */ 1836 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1837 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1838 /* nonzeros per row */ 1839 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1840 if (PetscBTLookup(change_basis,i)) { 1841 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1842 if (PetscBTLookup(qr_needed_idx,i)) { 1843 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1844 } else { 1845 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1846 /* get local primal index on the cc */ 1847 j = 0; 1848 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1849 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1850 } 1851 } 1852 } 1853 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1854 ierr = PetscFree(nnz);CHKERRQ(ierr); 1855 /* Set initial identity in the matrix */ 1856 for (i=0;i<pcis->n_B;i++) { 1857 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1858 } 1859 1860 if (pcbddc->dbg_flag) { 1861 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1862 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1863 } 1864 1865 1866 /* Now we loop on the constraints which need a change of basis */ 1867 /* 1868 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1869 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1870 1871 Basic blocks of change of basis matrix T computed 1872 1873 - Using the following block transformation if there is only a primal dof on the cc 1874 (in the example, primal dof is the last one of the edge in LOCAL ordering 1875 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1876 | 1 0 ... 0 1 | 1877 | 0 1 ... 0 1 | 1878 | ... | 1879 | 0 ... 1 1 | 1880 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1881 1882 - via QR decomposition of constraints otherwise 1883 */ 1884 if (qr_needed) { 1885 /* space to store Q */ 1886 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1887 /* first we issue queries for optimal work */ 1888 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1889 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1890 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1891 lqr_work = -1; 1892 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1893 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1894 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1895 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1896 lgqr_work = -1; 1897 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1898 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1899 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1900 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1901 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1902 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1903 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1904 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1905 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1906 /* array to store scaling factors for reflectors */ 1907 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1908 /* array to store rhs and solution of triangular solver */ 1909 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1910 /* allocating workspace for check */ 1911 if (pcbddc->dbg_flag) { 1912 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1913 } 1914 } 1915 /* array to store whether a node is primal or not */ 1916 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1917 for (i=0;i<total_primal_vertices;i++) { 1918 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1919 } 1920 1921 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1922 /* -> using implicit ordering contained in temp_indices data */ 1923 total_counts = pcbddc->n_vertices; 1924 primal_counter = total_counts; 1925 while (total_counts<pcbddc->local_primal_size) { 1926 primal_dofs = 1; 1927 if (PetscBTLookup(change_basis,total_counts)) { 1928 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1929 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1930 primal_dofs++; 1931 } 1932 /* get constraint info */ 1933 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1934 dual_dofs = size_of_constraint-primal_dofs; 1935 1936 if (pcbddc->dbg_flag) { 1937 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1938 } 1939 1940 if (primal_dofs > 1) { /* QR */ 1941 1942 /* copy quadrature constraints for change of basis check */ 1943 if (pcbddc->dbg_flag) { 1944 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1945 } 1946 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1947 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1948 1949 /* compute QR decomposition of constraints */ 1950 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1951 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1952 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1953 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1954 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1955 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1956 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1957 1958 /* explictly compute R^-T */ 1959 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1960 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1961 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1962 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1963 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1964 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1965 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1966 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1967 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1968 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1969 1970 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1971 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1972 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1973 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1974 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1975 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1976 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1977 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1978 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1979 1980 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1981 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1982 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1983 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1984 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1985 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1986 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1987 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1988 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1989 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1990 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1991 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1992 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1993 1994 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1995 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1996 /* insert cols for primal dofs */ 1997 for (j=0;j<primal_dofs;j++) { 1998 start_vals = &qr_basis[j*size_of_constraint]; 1999 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2000 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2001 } 2002 /* insert cols for dual dofs */ 2003 for (j=0,k=0;j<dual_dofs;k++) { 2004 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2005 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2006 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2007 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2008 j++; 2009 } 2010 } 2011 2012 /* check change of basis */ 2013 if (pcbddc->dbg_flag) { 2014 PetscInt ii,jj; 2015 PetscBool valid_qr=PETSC_TRUE; 2016 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2017 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2018 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2019 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2020 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2021 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2022 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2023 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2024 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2025 for (jj=0;jj<size_of_constraint;jj++) { 2026 for (ii=0;ii<primal_dofs;ii++) { 2027 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2028 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2029 } 2030 } 2031 if (!valid_qr) { 2032 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2033 for (jj=0;jj<size_of_constraint;jj++) { 2034 for (ii=0;ii<primal_dofs;ii++) { 2035 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2036 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2037 } 2038 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2039 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2040 } 2041 } 2042 } 2043 } else { 2044 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2045 } 2046 } 2047 } else { /* simple transformation block */ 2048 PetscInt row,col; 2049 PetscScalar val; 2050 for (j=0;j<size_of_constraint;j++) { 2051 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2052 if (!PetscBTLookup(is_primal,row)) { 2053 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2054 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2055 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2056 } else { 2057 for (k=0;k<size_of_constraint;k++) { 2058 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2059 if (row != col) { 2060 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2061 } else { 2062 val = 1.0; 2063 } 2064 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2065 } 2066 } 2067 } 2068 } 2069 /* increment primal counter */ 2070 primal_counter += primal_dofs; 2071 } else { 2072 if (pcbddc->dbg_flag) { 2073 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2074 } 2075 } 2076 /* increment constraint counter total_counts */ 2077 total_counts += primal_dofs; 2078 } 2079 2080 /* free workspace */ 2081 if (qr_needed) { 2082 if (pcbddc->dbg_flag) { 2083 ierr = PetscFree(work);CHKERRQ(ierr); 2084 } 2085 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2086 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2087 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2088 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2089 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2090 } 2091 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2092 /* assembling */ 2093 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2094 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2095 /* 2096 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2097 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2098 */ 2099 } 2100 2101 /* get indices in local ordering for vertices and constraints */ 2102 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2103 ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr); 2104 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2105 } 2106 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2107 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2108 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2109 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2110 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2111 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2112 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2113 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2114 /* check if a new primal space has been introduced */ 2115 pcbddc->new_primal_space_local = PETSC_TRUE; 2116 if (olocal_primal_size == pcbddc->local_primal_size) { 2117 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2118 pcbddc->new_primal_space_local = !pcbddc->new_primal_space_local; 2119 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2120 } 2121 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2122 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2123 2124 /* flush dbg viewer */ 2125 if (pcbddc->dbg_flag) { 2126 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2127 } 2128 2129 /* free workspace */ 2130 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2131 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2132 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2133 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2134 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2135 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2136 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2137 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2138 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2139 PetscFunctionReturn(0); 2140 } 2141 2142 #undef __FUNCT__ 2143 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2144 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2145 { 2146 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2147 PC_IS *pcis = (PC_IS*)pc->data; 2148 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2149 PetscInt bs,ierr,i,vertex_size; 2150 PetscViewer viewer=pcbddc->dbg_viewer; 2151 2152 PetscFunctionBegin; 2153 /* Reset previously computed graph */ 2154 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2155 /* Init local Graph struct */ 2156 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2157 2158 /* Check validity of the csr graph passed in by the user */ 2159 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2160 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2161 } 2162 2163 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2164 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2165 Mat mat_adj; 2166 const PetscInt *xadj,*adjncy; 2167 PetscBool flg_row=PETSC_TRUE; 2168 2169 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2170 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2171 if (!flg_row) { 2172 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2173 } 2174 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2175 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2176 if (!flg_row) { 2177 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2178 } 2179 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2180 } 2181 2182 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2183 vertex_size = 1; 2184 if (!pcbddc->user_provided_isfordofs) { 2185 if (!pcbddc->n_ISForDofs) { 2186 IS *custom_ISForDofs; 2187 2188 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2189 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2190 for (i=0;i<bs;i++) { 2191 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2192 } 2193 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2194 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2195 /* remove my references to IS objects */ 2196 for (i=0;i<bs;i++) { 2197 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2198 } 2199 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2200 } 2201 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2202 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2203 } 2204 2205 /* Setup of Graph */ 2206 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2207 2208 /* Graph's connected components analysis */ 2209 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2210 2211 /* print some info to stdout */ 2212 if (pcbddc->dbg_flag) { 2213 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2214 } 2215 2216 /* mark topography has done */ 2217 pcbddc->recompute_topography = PETSC_FALSE; 2218 PetscFunctionReturn(0); 2219 } 2220 2221 #undef __FUNCT__ 2222 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2223 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2224 { 2225 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2226 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2227 PetscErrorCode ierr; 2228 2229 PetscFunctionBegin; 2230 n = 0; 2231 vertices = 0; 2232 if (pcbddc->ConstraintMatrix) { 2233 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2234 for (i=0;i<local_primal_size;i++) { 2235 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2236 if (size_of_constraint == 1) n++; 2237 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2238 } 2239 if (vertices_idx) { 2240 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2241 n = 0; 2242 for (i=0;i<local_primal_size;i++) { 2243 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2244 if (size_of_constraint == 1) { 2245 vertices[n++]=row_cmat_indices[0]; 2246 } 2247 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2248 } 2249 } 2250 } 2251 *n_vertices = n; 2252 if (vertices_idx) *vertices_idx = vertices; 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2258 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2259 { 2260 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2261 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2262 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2263 PetscBT touched; 2264 PetscErrorCode ierr; 2265 2266 /* This function assumes that the number of local constraints per connected component 2267 is not greater than the number of nodes defined for the connected component 2268 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2269 PetscFunctionBegin; 2270 n = 0; 2271 constraints_index = 0; 2272 if (pcbddc->ConstraintMatrix) { 2273 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2274 max_size_of_constraint = 0; 2275 for (i=0;i<local_primal_size;i++) { 2276 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2277 if (size_of_constraint > 1) { 2278 n++; 2279 } 2280 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2281 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2282 } 2283 if (constraints_idx) { 2284 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2285 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2286 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2287 n = 0; 2288 for (i=0;i<local_primal_size;i++) { 2289 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2290 if (size_of_constraint > 1) { 2291 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2292 /* find first untouched local node */ 2293 j = 0; 2294 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2295 min_index = row_cmat_global_indices[j]; 2296 min_loc = j; 2297 /* search the minimum among nodes not yet touched on the connected component 2298 since there can be more than one constraint on a single cc */ 2299 for (j=1;j<size_of_constraint;j++) { 2300 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2301 min_index = row_cmat_global_indices[j]; 2302 min_loc = j; 2303 } 2304 } 2305 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2306 constraints_index[n++] = row_cmat_indices[min_loc]; 2307 } 2308 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2309 } 2310 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2311 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2312 } 2313 } 2314 *n_constraints = n; 2315 if (constraints_idx) *constraints_idx = constraints_index; 2316 PetscFunctionReturn(0); 2317 } 2318 2319 /* the next two functions has been adapted from pcis.c */ 2320 #undef __FUNCT__ 2321 #define __FUNCT__ "PCBDDCApplySchur" 2322 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2323 { 2324 PetscErrorCode ierr; 2325 PC_IS *pcis = (PC_IS*)(pc->data); 2326 2327 PetscFunctionBegin; 2328 if (!vec2_B) { vec2_B = v; } 2329 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2330 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2331 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2332 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2333 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2334 PetscFunctionReturn(0); 2335 } 2336 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2339 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2340 { 2341 PetscErrorCode ierr; 2342 PC_IS *pcis = (PC_IS*)(pc->data); 2343 2344 PetscFunctionBegin; 2345 if (!vec2_B) { vec2_B = v; } 2346 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2347 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2348 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2349 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2350 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2351 PetscFunctionReturn(0); 2352 } 2353 2354 #undef __FUNCT__ 2355 #define __FUNCT__ "PCBDDCSubsetNumbering" 2356 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2357 { 2358 Vec local_vec,global_vec; 2359 IS seqis,paris; 2360 VecScatter scatter_ctx; 2361 PetscScalar *array; 2362 PetscInt *temp_global_dofs; 2363 PetscScalar globalsum; 2364 PetscInt i,j,s; 2365 PetscInt nlocals,first_index,old_index,max_local; 2366 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2367 PetscMPIInt *dof_sizes,*dof_displs; 2368 PetscBool first_found; 2369 PetscErrorCode ierr; 2370 2371 PetscFunctionBegin; 2372 /* mpi buffers */ 2373 MPI_Comm_size(comm,&size_prec_comm); 2374 MPI_Comm_rank(comm,&rank_prec_comm); 2375 j = ( !rank_prec_comm ? size_prec_comm : 0); 2376 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2377 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2378 /* get maximum size of subset */ 2379 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2380 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2381 max_local = 0; 2382 if (n_local_dofs) { 2383 max_local = temp_global_dofs[0]; 2384 for (i=1;i<n_local_dofs;i++) { 2385 if (max_local < temp_global_dofs[i] ) { 2386 max_local = temp_global_dofs[i]; 2387 } 2388 } 2389 } 2390 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2391 max_global++; 2392 max_local = 0; 2393 if (n_local_dofs) { 2394 max_local = local_dofs[0]; 2395 for (i=1;i<n_local_dofs;i++) { 2396 if (max_local < local_dofs[i] ) { 2397 max_local = local_dofs[i]; 2398 } 2399 } 2400 } 2401 max_local++; 2402 /* allocate workspace */ 2403 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2404 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2405 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2406 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2407 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2408 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2409 /* create scatter */ 2410 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2411 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2412 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2413 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2414 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2415 /* init array */ 2416 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2417 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2418 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2419 if (local_dofs_mult) { 2420 for (i=0;i<n_local_dofs;i++) { 2421 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2422 } 2423 } else { 2424 for (i=0;i<n_local_dofs;i++) { 2425 array[local_dofs[i]]=1.0; 2426 } 2427 } 2428 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2429 /* scatter into global vec and get total number of global dofs */ 2430 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2431 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2432 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2433 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2434 /* Fill global_vec with cumulative function for global numbering */ 2435 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2436 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2437 nlocals = 0; 2438 first_index = -1; 2439 first_found = PETSC_FALSE; 2440 for (i=0;i<s;i++) { 2441 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2442 first_found = PETSC_TRUE; 2443 first_index = i; 2444 } 2445 nlocals += (PetscInt)PetscRealPart(array[i]); 2446 } 2447 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2448 if (!rank_prec_comm) { 2449 dof_displs[0]=0; 2450 for (i=1;i<size_prec_comm;i++) { 2451 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2452 } 2453 } 2454 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2455 if (first_found) { 2456 array[first_index] += (PetscScalar)nlocals; 2457 old_index = first_index; 2458 for (i=first_index+1;i<s;i++) { 2459 if (PetscRealPart(array[i]) > 0.0) { 2460 array[i] += array[old_index]; 2461 old_index = i; 2462 } 2463 } 2464 } 2465 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2466 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2467 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2468 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2469 /* get global ordering of local dofs */ 2470 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2471 if (local_dofs_mult) { 2472 for (i=0;i<n_local_dofs;i++) { 2473 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2474 } 2475 } else { 2476 for (i=0;i<n_local_dofs;i++) { 2477 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2478 } 2479 } 2480 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2481 /* free workspace */ 2482 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2483 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2484 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2485 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2486 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2487 /* return pointer to global ordering of local dofs */ 2488 *global_numbering_subset = temp_global_dofs; 2489 PetscFunctionReturn(0); 2490 } 2491 2492 #undef __FUNCT__ 2493 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2494 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2495 { 2496 PetscInt i,j; 2497 PetscScalar *alphas; 2498 PetscErrorCode ierr; 2499 2500 PetscFunctionBegin; 2501 /* this implements stabilized Gram-Schmidt */ 2502 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2503 for (i=0;i<n;i++) { 2504 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2505 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2506 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2507 } 2508 ierr = PetscFree(alphas);CHKERRQ(ierr); 2509 PetscFunctionReturn(0); 2510 } 2511 2512 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2513 #undef __FUNCT__ 2514 #define __FUNCT__ "MatConvert_IS_AIJ" 2515 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2516 { 2517 Mat new_mat; 2518 Mat_IS *matis = (Mat_IS*)(mat->data); 2519 /* info on mat */ 2520 PetscInt bs,rows,cols; 2521 PetscInt lrows,lcols; 2522 PetscInt local_rows,local_cols; 2523 PetscMPIInt nsubdomains; 2524 /* preallocation */ 2525 Vec vec_dnz,vec_onz; 2526 PetscScalar *my_dnz,*my_onz,*array; 2527 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2528 PetscInt index_row,index_col,owner; 2529 PetscInt *local_indices,*global_indices; 2530 /* work */ 2531 PetscInt i,j; 2532 PetscErrorCode ierr; 2533 2534 PetscFunctionBegin; 2535 /* CHECKS */ 2536 /* get info from mat */ 2537 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2538 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2539 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2540 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2541 2542 /* MAT_INITIAL_MATRIX */ 2543 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2544 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2545 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2546 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2547 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2548 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2549 2550 /* 2551 preallocation 2552 */ 2553 2554 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2555 /* 2556 Some vectors are needed to sum up properly on shared interface dofs. 2557 Note that preallocation is not exact, since it overestimates nonzeros 2558 */ 2559 /* 2560 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2561 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2562 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2563 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2564 */ 2565 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2566 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2567 /* All processes need to compute entire row ownership */ 2568 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2569 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2570 for (i=0;i<nsubdomains;i++) { 2571 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2572 row_ownership[j]=i; 2573 } 2574 } 2575 /* map indices of local mat to global values */ 2576 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2577 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2578 for (i=0;i<local_rows;i++) local_indices[i]=i; 2579 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2580 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2581 2582 /* 2583 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2584 then, they will be summed up properly. This way, preallocation is always sufficient 2585 */ 2586 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2587 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2588 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2589 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2590 for (i=0;i<local_rows;i++) { 2591 index_row = global_indices[i]; 2592 for (j=i;j<local_rows;j++) { 2593 owner = row_ownership[index_row]; 2594 index_col = global_indices[j]; 2595 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2596 my_dnz[i] += 1.0; 2597 } else { /* offdiag block */ 2598 my_onz[i] += 1.0; 2599 } 2600 /* same as before, interchanging rows and cols */ 2601 if (i != j) { 2602 owner = row_ownership[index_col]; 2603 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2604 my_dnz[j] += 1.0; 2605 } else { 2606 my_onz[j] += 1.0; 2607 } 2608 } 2609 } 2610 } 2611 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2612 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2613 if (local_rows) { /* multilevel guard */ 2614 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2615 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2616 } 2617 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2618 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2619 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2620 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2621 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2622 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2623 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2624 2625 /* set computed preallocation in dnz and onz */ 2626 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2627 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2628 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2629 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2630 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2631 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2632 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2633 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2634 2635 /* Resize preallocation if overestimated */ 2636 for (i=0;i<lrows;i++) { 2637 dnz[i] = PetscMin(dnz[i],lcols); 2638 onz[i] = PetscMin(onz[i],cols-lcols); 2639 } 2640 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2641 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2642 2643 /* 2644 Set values. Very Basic. 2645 */ 2646 for (i=0;i<local_rows;i++) { 2647 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2648 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2649 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2650 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2651 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2652 } 2653 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2654 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2655 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2656 2657 /* get back mat */ 2658 *M = new_mat; 2659 PetscFunctionReturn(0); 2660 } 2661 2662 #undef __FUNCT__ 2663 #define __FUNCT__ "MatISSubassemble_Private" 2664 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2665 { 2666 Mat subdomain_adj; 2667 IS new_ranks,ranks_send_to; 2668 MatPartitioning partitioner; 2669 Mat_IS *matis; 2670 PetscInt n_neighs,*neighs,*n_shared,**shared; 2671 PetscInt prank; 2672 PetscMPIInt size,rank,color; 2673 PetscInt *xadj,*adjncy,*oldranks; 2674 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2675 PetscInt i,j,n_subdomains,local_size,threshold=0; 2676 PetscErrorCode ierr; 2677 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2678 PetscSubcomm subcomm; 2679 2680 PetscFunctionBegin; 2681 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2682 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2683 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2684 2685 /* Get info on mapping */ 2686 matis = (Mat_IS*)(mat->data); 2687 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2688 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2689 2690 /* build local CSR graph of subdomains' connectivity */ 2691 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2692 xadj[0] = 0; 2693 xadj[1] = PetscMax(n_neighs-1,0); 2694 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2695 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2696 2697 if (threshold) { 2698 PetscInt* count,min_threshold; 2699 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2700 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2701 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2702 for (j=0;j<n_shared[i];j++) { 2703 count[shared[i][j]] += 1; 2704 } 2705 } 2706 /* adapt threshold since we dont want to lose significant connections */ 2707 min_threshold = n_neighs; 2708 for (i=1;i<n_neighs;i++) { 2709 for (j=0;j<n_shared[i];j++) { 2710 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2711 } 2712 } 2713 threshold = PetscMax(min_threshold+1,threshold); 2714 xadj[1] = 0; 2715 for (i=1;i<n_neighs;i++) { 2716 for (j=0;j<n_shared[i];j++) { 2717 if (count[shared[i][j]] < threshold) { 2718 adjncy[xadj[1]] = neighs[i]; 2719 adjncy_wgt[xadj[1]] = n_shared[i]; 2720 xadj[1]++; 2721 break; 2722 } 2723 } 2724 } 2725 ierr = PetscFree(count);CHKERRQ(ierr); 2726 } else { 2727 if (xadj[1]) { 2728 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2729 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2730 } 2731 } 2732 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2733 if (use_square) { 2734 for (i=0;i<xadj[1];i++) { 2735 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2736 } 2737 } 2738 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2739 2740 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2741 2742 /* 2743 Restrict work on active processes only. 2744 */ 2745 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2746 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2747 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2748 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2749 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2750 if (color) { 2751 ierr = PetscFree(xadj);CHKERRQ(ierr); 2752 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2753 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2754 } else { 2755 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2756 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2757 prank = rank; 2758 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2759 for (i=0;i<size;i++) { 2760 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2761 } 2762 for (i=0;i<xadj[1];i++) { 2763 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2764 } 2765 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2766 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2767 n_subdomains = (PetscInt)size; 2768 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2769 2770 /* Partition */ 2771 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2772 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2773 if (use_vwgt) { 2774 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2775 v_wgt[0] = local_size; 2776 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2777 } 2778 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2779 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2780 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2781 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2782 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2783 2784 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2785 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2786 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2787 /* clean up */ 2788 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2789 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2790 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2791 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2792 } 2793 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2794 2795 /* assemble parallel IS for sends */ 2796 i = 1; 2797 if (color) i=0; 2798 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2799 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2800 2801 /* get back IS */ 2802 *is_sends = ranks_send_to; 2803 PetscFunctionReturn(0); 2804 } 2805 2806 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2807 2808 #undef __FUNCT__ 2809 #define __FUNCT__ "MatISSubassemble" 2810 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2811 { 2812 Mat local_mat,new_mat; 2813 Mat_IS *matis; 2814 IS is_sends_internal; 2815 PetscInt rows,cols; 2816 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2817 PetscBool ismatis,isdense; 2818 ISLocalToGlobalMapping l2gmap; 2819 PetscInt* l2gmap_indices; 2820 const PetscInt* is_indices; 2821 MatType new_local_type; 2822 MatTypePrivate new_local_type_private; 2823 /* buffers */ 2824 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2825 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2826 /* MPI */ 2827 MPI_Comm comm; 2828 PetscMPIInt n_sends,n_recvs,commsize; 2829 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2830 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2831 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2832 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2833 PetscErrorCode ierr; 2834 2835 PetscFunctionBegin; 2836 /* checks */ 2837 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2838 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2839 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2840 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2841 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2842 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2843 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2844 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2845 PetscValidLogicalCollectiveInt(mat,bs,0); 2846 /* prepare IS for sending if not provided */ 2847 if (!is_sends) { 2848 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2849 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2850 } else { 2851 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2852 is_sends_internal = is_sends; 2853 } 2854 2855 /* get pointer of MATIS data */ 2856 matis = (Mat_IS*)mat->data; 2857 2858 /* get comm */ 2859 comm = PetscObjectComm((PetscObject)mat); 2860 2861 /* compute number of sends */ 2862 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2863 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2864 2865 /* compute number of receives */ 2866 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2867 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2868 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2869 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2870 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2871 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2872 ierr = PetscFree(iflags);CHKERRQ(ierr); 2873 2874 /* prepare send/receive buffers */ 2875 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2876 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2877 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2878 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2879 2880 /* Get data from local mat */ 2881 if (!isdense) { 2882 /* TODO: See below some guidelines on how to prepare the local buffers */ 2883 /* 2884 send_buffer_vals should contain the raw values of the local matrix 2885 send_buffer_idxs should contain: 2886 - MatType_PRIVATE type 2887 - PetscInt size_of_l2gmap 2888 - PetscInt global_row_indices[size_of_l2gmap] 2889 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2890 */ 2891 } else { 2892 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2893 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2894 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2895 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2896 send_buffer_idxs[1] = i; 2897 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2898 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2899 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2900 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2901 for (i=0;i<n_sends;i++) { 2902 ilengths_vals[is_indices[i]] = len*len; 2903 ilengths_idxs[is_indices[i]] = len+2; 2904 } 2905 } 2906 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2907 buf_size_idxs = 0; 2908 buf_size_vals = 0; 2909 for (i=0;i<n_recvs;i++) { 2910 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2911 buf_size_vals += (PetscInt)olengths_vals[i]; 2912 } 2913 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2914 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2915 2916 /* get new tags for clean communications */ 2917 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2918 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2919 2920 /* allocate for requests */ 2921 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2922 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2923 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2924 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2925 2926 /* communications */ 2927 ptr_idxs = recv_buffer_idxs; 2928 ptr_vals = recv_buffer_vals; 2929 for (i=0;i<n_recvs;i++) { 2930 source_dest = onodes[i]; 2931 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2932 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2933 ptr_idxs += olengths_idxs[i]; 2934 ptr_vals += olengths_vals[i]; 2935 } 2936 for (i=0;i<n_sends;i++) { 2937 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2938 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2939 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2940 } 2941 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2942 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2943 2944 /* assemble new l2g map */ 2945 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2946 ptr_idxs = recv_buffer_idxs; 2947 buf_size_idxs = 0; 2948 for (i=0;i<n_recvs;i++) { 2949 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2950 ptr_idxs += olengths_idxs[i]; 2951 } 2952 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2953 ptr_idxs = recv_buffer_idxs; 2954 buf_size_idxs = 0; 2955 for (i=0;i<n_recvs;i++) { 2956 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2957 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2958 ptr_idxs += olengths_idxs[i]; 2959 } 2960 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2961 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2962 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2963 2964 /* infer new local matrix type from received local matrices type */ 2965 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2966 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2967 new_local_type_private = MATAIJ_PRIVATE; 2968 new_local_type = MATSEQAIJ; 2969 if (n_recvs) { 2970 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 2971 ptr_idxs = recv_buffer_idxs; 2972 for (i=0;i<n_recvs;i++) { 2973 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2974 new_local_type_private = MATAIJ_PRIVATE; 2975 break; 2976 } 2977 ptr_idxs += olengths_idxs[i]; 2978 } 2979 switch (new_local_type_private) { 2980 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2981 new_local_type = MATSEQAIJ; 2982 bs = 1; 2983 break; 2984 case MATAIJ_PRIVATE: 2985 new_local_type = MATSEQAIJ; 2986 bs = 1; 2987 break; 2988 case MATBAIJ_PRIVATE: 2989 new_local_type = MATSEQBAIJ; 2990 break; 2991 case MATSBAIJ_PRIVATE: 2992 new_local_type = MATSEQSBAIJ; 2993 break; 2994 default: 2995 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2996 break; 2997 } 2998 } 2999 3000 /* create MATIS object */ 3001 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3002 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 3003 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3004 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 3005 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3006 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3007 3008 /* set values */ 3009 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3010 ptr_vals = recv_buffer_vals; 3011 ptr_idxs = recv_buffer_idxs; 3012 for (i=0;i<n_recvs;i++) { 3013 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3014 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3015 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3016 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3017 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3018 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3019 } 3020 ptr_idxs += olengths_idxs[i]; 3021 ptr_vals += olengths_vals[i]; 3022 } 3023 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3024 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3025 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3026 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3027 3028 { /* check */ 3029 Vec lvec,rvec; 3030 PetscReal infty_error; 3031 3032 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3033 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3034 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3035 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3036 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 3037 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3038 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3039 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3040 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3041 } 3042 3043 /* free workspace */ 3044 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3045 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3046 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3047 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3048 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3049 if (isdense) { 3050 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3051 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3052 } else { 3053 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3054 } 3055 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3056 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3057 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3058 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3059 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3060 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3061 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3062 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3063 ierr = PetscFree(onodes);CHKERRQ(ierr); 3064 /* get back new mat */ 3065 *mat_n = new_mat; 3066 PetscFunctionReturn(0); 3067 } 3068 3069 #undef __FUNCT__ 3070 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3071 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3072 { 3073 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3074 PC_IS *pcis = (PC_IS*)pc->data; 3075 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3076 MatNullSpace CoarseNullSpace=NULL; 3077 ISLocalToGlobalMapping coarse_islg; 3078 IS coarse_is; 3079 PetscInt max_it; 3080 PetscInt im_active=-1,active_procs=-1; 3081 PC pc_temp; 3082 PCType coarse_pc_type; 3083 KSPType coarse_ksp_type; 3084 PetscBool multilevel_requested,multilevel_allowed; 3085 PetscBool setsym,issym,isbddc,isnn; 3086 MatStructure matstruct; 3087 PetscErrorCode ierr; 3088 3089 PetscFunctionBegin; 3090 /* Assign global numbering to coarse dofs */ 3091 if (pcbddc->new_primal_space) { /* this is suboptimal -> we can chech when creating local primal indices */ 3092 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3093 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3094 /* see if we can avoid some work */ 3095 if (pcbddc->coarse_ksp) { /* already present */ 3096 Mat tcoarse_mat; 3097 PetscInt ocoarse_size; 3098 ierr = KSPGetOperators(pcbddc->coarse_ksp,NULL,&tcoarse_mat,NULL);CHKERRQ(ierr); 3099 ierr = MatGetSize(tcoarse_mat,&ocoarse_size,NULL);CHKERRQ(ierr); 3100 if (ocoarse_size != pcbddc->coarse_size) { /* we need to reset the KSP */ 3101 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3102 } 3103 } 3104 } 3105 3106 /* infer some info from user */ 3107 issym = PETSC_FALSE; 3108 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3109 multilevel_allowed = PETSC_FALSE; 3110 multilevel_requested = PETSC_FALSE; 3111 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 3112 if (multilevel_requested) { 3113 /* count "active processes" */ 3114 im_active = !!(pcis->n); 3115 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3116 if (active_procs/pcbddc->coarsening_ratio < 2) { 3117 multilevel_allowed = PETSC_FALSE; 3118 } else { 3119 multilevel_allowed = PETSC_TRUE; 3120 } 3121 } 3122 3123 /* set defaults for coarse KSP and PC */ 3124 if (multilevel_allowed) { 3125 if (issym) { 3126 coarse_ksp_type = KSPRICHARDSON; 3127 } else { 3128 coarse_ksp_type = KSPCHEBYSHEV; 3129 } 3130 coarse_pc_type = PCBDDC; 3131 } else { 3132 coarse_ksp_type = KSPPREONLY; 3133 coarse_pc_type = PCREDUNDANT; 3134 } 3135 3136 /* create the coarse KSP object only once with defaults */ 3137 if (!pcbddc->coarse_ksp) { 3138 char prefix[256],str_level[3]; 3139 size_t len; 3140 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3141 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3142 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3143 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3144 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3145 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3146 /* prefix */ 3147 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3148 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3149 if (!pcbddc->current_level) { 3150 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3151 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3152 } else { 3153 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3154 if (pcbddc->current_level>1) len -= 2; 3155 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3156 *(prefix+len)='\0'; 3157 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3158 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3159 } 3160 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3161 } 3162 /* allow user customization */ 3163 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3164 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3165 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3166 3167 /* get some info after set from options */ 3168 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3169 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3170 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3171 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3172 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3173 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3174 isbddc = PETSC_FALSE; 3175 } 3176 3177 /* propagate BDDC info to the next level */ 3178 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3179 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3180 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3181 3182 /* creates temporary MATIS object for coarse matrix */ 3183 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3184 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3185 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3186 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3187 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3188 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3189 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3190 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3191 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3192 3193 /* assemble coarse matrix */ 3194 if (isbddc || isnn) { 3195 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3196 } else { /* need to provide reuse */ 3197 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3198 } 3199 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3200 3201 /* create local to global scatters for coarse problem */ 3202 if (pcbddc->new_primal_space) { 3203 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3204 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 3205 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3206 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3207 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3208 } 3209 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3210 3211 /* propagate symmetry info to coarse matrix */ 3212 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3213 3214 /* Compute coarse null space (special handling by BDDC only) */ 3215 if (pcbddc->NullSpace) { 3216 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3217 if (isbddc) { 3218 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3219 } else { 3220 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3221 } 3222 } 3223 3224 /* set operators */ 3225 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3226 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3227 3228 /* additional KSP customization */ 3229 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3230 if (max_it < 5) { 3231 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3232 } 3233 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3234 3235 3236 /* print some info if requested */ 3237 if (pcbddc->dbg_flag) { 3238 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3239 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3240 if (!multilevel_allowed) { 3241 if (multilevel_requested) { 3242 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3243 } else if (pcbddc->max_levels) { 3244 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3245 } 3246 } 3247 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3248 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3249 } 3250 3251 /* setup coarse ksp */ 3252 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3253 if (pcbddc->dbg_flag) { 3254 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3255 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3256 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3257 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3258 } 3259 3260 /* Check coarse problem if requested */ 3261 if (pcbddc->dbg_flag) { 3262 KSP check_ksp; 3263 KSPType check_ksp_type; 3264 PC check_pc; 3265 Vec check_vec; 3266 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3267 PetscInt its; 3268 PetscBool ispreonly,compute; 3269 3270 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3271 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3272 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3273 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3274 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3275 if (ispreonly) { 3276 check_ksp_type = KSPPREONLY; 3277 compute = PETSC_FALSE; 3278 } else { 3279 if (issym) check_ksp_type = KSPCG; 3280 else check_ksp_type = KSPGMRES; 3281 compute = PETSC_TRUE; 3282 } 3283 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3284 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3285 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3286 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3287 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3288 /* create random vec */ 3289 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3290 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3291 if (CoarseNullSpace) { 3292 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3293 } 3294 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3295 /* solve coarse problem */ 3296 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3297 if (CoarseNullSpace) { 3298 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3299 } 3300 /* check coarse problem residual error */ 3301 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3302 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3303 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3304 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3305 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3306 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3307 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3308 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3309 /* get eigenvalue estimation if preonly has not been requested */ 3310 if (!ispreonly) { 3311 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3312 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3313 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3314 } 3315 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3316 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3317 } 3318 /* free memory */ 3319 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3320 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3321 PetscFunctionReturn(0); 3322 } 3323 3324 #undef __FUNCT__ 3325 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3326 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3327 { 3328 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3329 PC_IS* pcis = (PC_IS*)pc->data; 3330 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3331 PetscInt i,coarse_size; 3332 PetscInt *local_primal_indices; 3333 PetscErrorCode ierr; 3334 3335 PetscFunctionBegin; 3336 /* Compute global number of coarse dofs */ 3337 if (!pcbddc->primal_indices_local_idxs) { 3338 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 3339 } 3340 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3341 3342 /* check numbering */ 3343 if (pcbddc->dbg_flag) { 3344 PetscScalar coarsesum,*array; 3345 3346 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3347 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3348 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3349 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3350 for (i=0;i<pcbddc->local_primal_size;i++) { 3351 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3352 } 3353 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3354 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3355 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3356 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3357 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3358 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3359 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3360 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3361 for (i=0;i<pcis->n;i++) { 3362 if (array[i] == 1.0) { 3363 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3364 } 3365 } 3366 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3367 for (i=0;i<pcis->n;i++) { 3368 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3369 } 3370 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3371 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3372 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3373 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3374 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3375 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3376 if (pcbddc->dbg_flag > 1) { 3377 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3378 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3379 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3380 for (i=0;i<pcbddc->local_primal_size;i++) { 3381 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3382 } 3383 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3384 } 3385 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3386 } 3387 /* get back data */ 3388 *coarse_size_n = coarse_size; 3389 *local_primal_indices_n = local_primal_indices; 3390 PetscFunctionReturn(0); 3391 } 3392 3393