1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCSetUpSolvers" 10 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscScalar *coarse_submat_vals; 14 PetscErrorCode ierr; 15 16 PetscFunctionBegin; 17 /* Compute matrix after change of basis and extract local submatrices */ 18 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D */ 21 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 22 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 23 24 /* Setup local solvers ksp_D and ksp_R */ 25 /* PCBDDCSetUpLocalScatters should be called first! */ 26 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 27 28 /* Change global null space passed in by the user if change of basis has been requested */ 29 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 30 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 31 } 32 33 /* 34 Setup local correction and local part of coarse basis. 35 Gives back the dense local part of the coarse matrix in column major ordering 36 */ 37 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 38 39 /* Compute total number of coarse nodes and setup coarse solver */ 40 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 41 42 /* free */ 43 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 #undef __FUNCT__ 48 #define __FUNCT__ "PCBDDCResetCustomization" 49 PetscErrorCode PCBDDCResetCustomization(PC pc) 50 { 51 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 52 PetscErrorCode ierr; 53 54 PetscFunctionBegin; 55 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 57 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 59 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 61 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 62 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 63 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 64 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 65 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 #undef __FUNCT__ 70 #define __FUNCT__ "PCBDDCResetTopography" 71 PetscErrorCode PCBDDCResetTopography(PC pc) 72 { 73 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 74 PetscErrorCode ierr; 75 76 PetscFunctionBegin; 77 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 78 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 79 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 80 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 81 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 82 PetscFunctionReturn(0); 83 } 84 85 #undef __FUNCT__ 86 #define __FUNCT__ "PCBDDCResetSolvers" 87 PetscErrorCode PCBDDCResetSolvers(PC pc) 88 { 89 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 90 PetscErrorCode ierr; 91 92 PetscFunctionBegin; 93 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 94 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 101 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 102 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 103 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 104 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 105 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 106 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 107 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 108 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 109 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 110 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 111 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 112 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 113 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 114 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 115 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 116 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 #undef __FUNCT__ 121 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 122 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 123 { 124 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 125 PC_IS *pcis = (PC_IS*)pc->data; 126 VecType impVecType; 127 PetscInt n_constraints,n_R,old_size; 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 if (!pcbddc->ConstraintMatrix) { 132 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 133 } 134 /* get sizes */ 135 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 136 n_R = pcis->n-pcbddc->n_actual_vertices; 137 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 138 /* local work vectors (try to avoid unneeded work)*/ 139 /* R nodes */ 140 old_size = -1; 141 if (pcbddc->vec1_R) { 142 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 143 } 144 if (n_R != old_size) { 145 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 146 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 147 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 148 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 149 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 150 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 151 } 152 /* local primal dofs */ 153 old_size = -1; 154 if (pcbddc->vec1_P) { 155 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 156 } 157 if (pcbddc->local_primal_size != old_size) { 158 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 159 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 160 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 161 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 162 } 163 /* local explicit constraints */ 164 old_size = -1; 165 if (pcbddc->vec1_C) { 166 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 167 } 168 if (n_constraints && n_constraints != old_size) { 169 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 170 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 171 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 172 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 173 } 174 PetscFunctionReturn(0); 175 } 176 177 #undef __FUNCT__ 178 #define __FUNCT__ "PCBDDCSetUpCorrection" 179 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 180 { 181 PetscErrorCode ierr; 182 /* pointers to pcis and pcbddc */ 183 PC_IS* pcis = (PC_IS*)pc->data; 184 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 185 /* submatrices of local problem */ 186 Mat A_RV,A_VR,A_VV; 187 /* working matrices */ 188 Mat M1,M2,M3,C_CR; 189 /* working vectors */ 190 Vec vec1_C,vec2_C,vec1_V,vec2_V; 191 /* additional working stuff */ 192 IS is_aux; 193 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 194 const PetscScalar *array,*row_cmat_values; 195 const PetscInt *row_cmat_indices,*idx_R_local; 196 PetscInt *idx_V_B,*auxindices; 197 PetscInt n_vertices,n_constraints,size_of_constraint; 198 PetscInt i,j,n_R,n_D,n_B; 199 PetscBool unsymmetric_check; 200 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 201 MatType impMatType; 202 /* some shortcuts to scalars */ 203 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 204 /* for debugging purposes */ 205 PetscReal *coarsefunctions_errors,*constraints_errors; 206 207 PetscFunctionBegin; 208 /* get number of vertices (corners plus constraints with change of basis) 209 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 210 n_vertices = pcbddc->n_actual_vertices; 211 n_constraints = pcbddc->local_primal_size-n_vertices; 212 /* Set Non-overlapping dimensions */ 213 n_B = pcis->n_B; n_D = pcis->n - n_B; 214 n_R = pcis->n-n_vertices; 215 216 /* Set types for local objects needed by BDDC precondtioner */ 217 impMatType = MATSEQDENSE; 218 219 /* Allocating some extra storage just to be safe */ 220 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 221 for (i=0;i<pcis->n;i++) auxindices[i]=i; 222 223 /* vertices in boundary numbering */ 224 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 225 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 226 if (i != n_vertices) { 227 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 228 } 229 230 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 231 if (n_constraints) { 232 /* see if we can save some allocations */ 233 if (pcbddc->local_auxmat2) { 234 PetscInt on_R,on_constraints; 235 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 236 if (on_R != n_R || on_constraints != n_constraints) { 237 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 238 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 239 } 240 } 241 /* work vectors */ 242 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 243 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 244 /* auxiliary matrices */ 245 if (!pcbddc->local_auxmat2) { 246 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 247 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 248 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 249 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 250 } 251 252 /* Extract constraints on R nodes: C_{CR} */ 253 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 254 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 255 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 256 257 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 258 for (i=0;i<n_constraints;i++) { 259 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 260 /* Get row of constraint matrix in R numbering */ 261 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 262 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 263 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 264 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 265 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 266 /* Solve for row of constraint matrix in R numbering */ 267 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 268 /* Set values in local_auxmat2 */ 269 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 270 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 271 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 272 } 273 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 274 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 275 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 276 277 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 278 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 279 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 280 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 281 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 282 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 283 ierr = MatSetUp(M1);CHKERRQ(ierr); 284 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 285 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 286 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 287 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 288 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 289 ierr = MatDestroy(&M2);CHKERRQ(ierr); 290 ierr = MatDestroy(&M3);CHKERRQ(ierr); 291 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 292 if (!pcbddc->local_auxmat1) { 293 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 294 } else { 295 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 296 } 297 } 298 299 /* Get submatrices from subdomain matrix */ 300 if (n_vertices) { 301 PetscInt ibs,mbs; 302 PetscBool issbaij; 303 Mat newmat; 304 305 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 306 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 307 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 308 if (ibs != mbs) { /* need to convert to SEQAIJ */ 309 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 310 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 311 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 312 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 313 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 314 } else { 315 /* this is safe */ 316 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 317 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 318 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 319 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 320 /* which of the two approaches is faster? */ 321 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 322 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 323 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 324 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 325 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 326 } else { 327 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 328 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 329 } 330 } 331 ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 332 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 333 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 334 } 335 336 /* Matrix of coarse basis functions (local) */ 337 if (pcbddc->coarse_phi_B) { 338 PetscInt on_B,on_primal; 339 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 340 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 341 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 342 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 343 } 344 } 345 if (pcbddc->coarse_phi_D) { 346 PetscInt on_D,on_primal; 347 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 348 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 349 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 350 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 351 } 352 } 353 if (!pcbddc->coarse_phi_B) { 354 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 355 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 356 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 357 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 358 } 359 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 360 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 361 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 362 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 363 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 364 } 365 366 if (pcbddc->dbg_flag) { 367 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 368 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 369 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 370 } 371 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 372 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 373 374 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 375 376 /* vertices */ 377 for (i=0;i<n_vertices;i++) { 378 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 379 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 380 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 381 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 382 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 383 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 384 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 385 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 386 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 387 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 388 if (n_constraints) { 389 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 390 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 391 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 392 } 393 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 394 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 395 396 /* Set values in coarse basis function and subdomain part of coarse_mat */ 397 /* coarse basis functions */ 398 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 399 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 400 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 401 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 402 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 403 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 404 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 405 if (pcbddc->switch_static || pcbddc->dbg_flag) { 406 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 407 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 408 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 409 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 410 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 411 } 412 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 413 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 414 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 416 if (n_constraints) { 417 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 418 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 419 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 420 } 421 422 /* check */ 423 if (pcbddc->dbg_flag) { 424 /* assemble subdomain vector on local nodes */ 425 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 426 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 427 if (n_R) { 428 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 429 } 430 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 431 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 432 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 433 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 434 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 435 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 436 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 437 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 438 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 439 if (n_constraints) { 440 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 441 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 442 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 443 } 444 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 445 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 446 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 447 /* check saddle point solution */ 448 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 449 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 450 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 451 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 452 /* shift by the identity matrix */ 453 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 454 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 455 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 456 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 457 } 458 } 459 460 /* constraints */ 461 for (i=0;i<n_constraints;i++) { 462 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 463 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 464 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 465 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 466 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 467 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 468 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 469 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 470 if (n_vertices) { 471 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 472 } 473 /* Set values in coarse basis function and subdomain part of coarse_mat */ 474 /* coarse basis functions */ 475 j = i+n_vertices; /* don't touch this! */ 476 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 477 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 480 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 481 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 482 if (pcbddc->switch_static || pcbddc->dbg_flag) { 483 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 484 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 485 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 486 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 487 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 488 } 489 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 490 if (n_vertices) { 491 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 492 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 493 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 494 } 495 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 496 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 497 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 498 499 if (pcbddc->dbg_flag) { 500 /* assemble subdomain vector on nodes */ 501 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 502 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 503 if (n_R) { 504 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 505 } 506 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 507 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 508 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 509 /* assemble subdomain vector of lagrange multipliers */ 510 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 511 if (n_vertices) { 512 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 513 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 514 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 515 } 516 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 517 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 518 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 519 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 520 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 521 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 522 /* check saddle point solution */ 523 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 524 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 525 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 526 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 527 /* shift by the identity matrix */ 528 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 529 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 530 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 531 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 532 } 533 } 534 /* call assembling routines for local coarse basis */ 535 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 536 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 537 if (pcbddc->switch_static || pcbddc->dbg_flag) { 538 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 539 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 540 } 541 542 /* compute other basis functions for non-symmetric problems */ 543 /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/ 544 { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */ 545 PetscBool setsym; 546 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr); 547 if (!setsym) pcbddc->issym = PETSC_FALSE; 548 } 549 550 if (!pcbddc->issym) { 551 if (!pcbddc->coarse_psi_B) { 552 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 553 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 554 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 555 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 556 } 557 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 558 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 559 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 560 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 561 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 562 } 563 for (i=0;i<pcbddc->local_primal_size;i++) { 564 if (n_constraints) { 565 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 566 for (j=0;j<n_constraints;j++) { 567 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 568 } 569 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 570 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 571 } 572 if (i<n_vertices) { 573 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 574 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 575 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 576 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 577 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 578 if (n_constraints) { 579 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 580 } 581 } else { 582 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 583 } 584 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 585 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 586 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 587 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 588 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 589 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 590 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 591 if (i<n_vertices) { 592 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 593 } 594 if (pcbddc->switch_static || pcbddc->dbg_flag) { 595 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 596 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 597 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 598 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 599 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 600 } 601 602 if (pcbddc->dbg_flag) { 603 /* assemble subdomain vector on nodes */ 604 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 605 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 606 if (n_R) { 607 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 608 } 609 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 610 if (i<n_vertices) { 611 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 612 } 613 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 614 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 615 /* assemble subdomain vector of lagrange multipliers */ 616 for (j=0;j<pcbddc->local_primal_size;j++) { 617 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 618 } 619 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 620 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 621 /* check saddle point solution */ 622 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 623 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 624 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 625 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 626 /* shift by the identity matrix */ 627 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 628 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 629 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 630 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 631 } 632 } 633 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 634 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 635 if (pcbddc->switch_static || pcbddc->dbg_flag) { 636 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 637 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 638 } 639 unsymmetric_check = PETSC_TRUE; 640 } else { /* take references to already computed coarse basis */ 641 unsymmetric_check = PETSC_FALSE; 642 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 643 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 644 if (pcbddc->coarse_phi_D) { 645 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 646 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 647 } 648 } 649 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 650 /* Checking coarse_sub_mat and coarse basis functios */ 651 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 652 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 653 if (pcbddc->dbg_flag) { 654 Mat coarse_sub_mat; 655 Mat AUXMAT,TM1,TM2,TM3,TM4; 656 Mat coarse_phi_D,coarse_phi_B; 657 Mat coarse_psi_D,coarse_psi_B; 658 Mat A_II,A_BB,A_IB,A_BI; 659 MatType checkmattype=MATSEQAIJ; 660 PetscReal real_value; 661 662 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 663 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 664 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 665 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 666 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 667 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 668 if (unsymmetric_check) { 669 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 670 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 671 } 672 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 673 674 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 675 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 676 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 677 if (unsymmetric_check) { 678 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 679 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 680 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 681 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 682 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 683 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 684 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 685 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 686 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 687 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 688 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 689 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 690 } else { 691 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 692 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 693 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 694 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 695 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 696 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 697 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 698 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 699 } 700 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 701 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 702 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 703 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 704 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 705 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 706 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 708 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 709 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 710 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 711 for (i=0;i<pcbddc->local_primal_size;i++) { 712 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 713 } 714 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 715 for (i=0;i<pcbddc->local_primal_size;i++) { 716 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 717 } 718 if (unsymmetric_check) { 719 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 720 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 721 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 722 } 723 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 724 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 725 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 726 } 727 } 728 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 729 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 730 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 731 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 732 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 733 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 734 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 735 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 736 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 737 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 738 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 739 if (unsymmetric_check) { 740 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 741 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 742 } 743 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 744 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 745 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 746 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 747 } 748 /* free memory */ 749 if (n_vertices) { 750 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 751 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 752 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 753 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 754 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 755 } 756 if (n_constraints) { 757 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 758 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 759 ierr = MatDestroy(&M1);CHKERRQ(ierr); 760 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 761 } 762 ierr = PetscFree(auxindices);CHKERRQ(ierr); 763 /* get back data */ 764 *coarse_submat_vals_n = coarse_submat_vals; 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 770 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 771 { 772 PC_IS* pcis = (PC_IS*)(pc->data); 773 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 774 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 775 PetscBool issbaij,isseqaij; 776 /* manage repeated solves */ 777 MatReuse reuse; 778 PetscErrorCode ierr; 779 780 PetscFunctionBegin; 781 if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) { 782 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 783 } 784 /* get mat flags */ 785 reuse = MAT_INITIAL_MATRIX; 786 if (pc->setupcalled) { 787 if (pc->flag == SAME_NONZERO_PATTERN) { 788 reuse = MAT_REUSE_MATRIX; 789 } else { 790 reuse = MAT_INITIAL_MATRIX; 791 } 792 } 793 if (reuse == MAT_INITIAL_MATRIX) { 794 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 795 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 796 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 797 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 798 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 799 } 800 801 /* transform local matrices if needed */ 802 if (pcbddc->ChangeOfBasisMatrix) { 803 Mat *change_mat_all; 804 IS is_local,is_global; 805 PetscBool sorted; 806 PetscInt *idxs_perm; 807 808 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_local);CHKERRQ(ierr); 809 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 810 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 811 ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr); 812 if (!sorted) { 813 const PetscInt *idxs; 814 PetscInt *idxs_sorted,i; 815 816 ierr = PetscMalloc1(pcis->n,&idxs_perm);CHKERRQ(ierr); 817 ierr = PetscMalloc1(pcis->n,&idxs_sorted);CHKERRQ(ierr); 818 for (i=0;i<pcis->n;i++) { 819 idxs_perm[i] = i; 820 } 821 ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr); 822 ierr = PetscSortIntWithPermutation(pcis->n,idxs,idxs_perm);CHKERRQ(ierr); 823 for (i=0;i<pcis->n;i++) { 824 idxs_sorted[i] = idxs[idxs_perm[i]]; 825 } 826 ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr); 827 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 828 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr); 829 } 830 831 /* get change of basis on the whole set of local dofs */ 832 ierr = MatGetSubMatrices(pcbddc->ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr); 833 834 if (!sorted) { 835 Mat new_mat; 836 IS is_perm; 837 PetscInt *idxs,i; 838 839 ierr = PetscMalloc1(pcis->n,&idxs);CHKERRQ(ierr); 840 for (i=0;i<pcis->n;i++) { 841 idxs[idxs_perm[i]] = i; 842 } 843 ierr = PetscFree(idxs_perm);CHKERRQ(ierr); 844 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr); 845 ierr = ISSetPermutation(is_perm);CHKERRQ(ierr); 846 ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr); 847 ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr); 848 change_mat_all[0] = new_mat; 849 ierr = ISDestroy(&is_perm);CHKERRQ(ierr); 850 } 851 852 /* check */ 853 if (pcbddc->dbg_flag) { 854 Vec x,x_change; 855 PetscReal error; 856 857 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 858 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 859 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 860 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 861 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 863 ierr = MatMult(change_mat_all[0],pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 864 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 865 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 866 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 867 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 868 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 869 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 870 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 871 ierr = VecDestroy(&x);CHKERRQ(ierr); 872 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 873 } 874 875 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 876 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 877 if (isseqaij) { 878 ierr = MatPtAP(matis->A,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 879 } else { 880 Mat work_mat; 881 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 882 ierr = MatPtAP(work_mat,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 883 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 884 } 885 /* 886 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 887 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 888 */ 889 ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr); 890 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 891 } else { 892 /* without change of basis, the local matrix is unchanged */ 893 if (!pcbddc->local_mat) { 894 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 895 pcbddc->local_mat = matis->A; 896 } 897 } 898 899 /* get submatrices */ 900 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 901 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 902 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 903 if (!issbaij) { 904 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 905 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 906 } else { 907 Mat newmat; 908 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 909 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 910 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 911 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } 915 916 #undef __FUNCT__ 917 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 918 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 919 { 920 PC_IS* pcis = (PC_IS*)(pc->data); 921 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 922 IS is_aux1,is_aux2; 923 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 924 PetscInt n_vertices,i,j,n_R,n_D,n_B; 925 PetscInt vbs,bs; 926 PetscBT bitmask; 927 PetscErrorCode ierr; 928 929 PetscFunctionBegin; 930 /* 931 No need to setup local scatters if 932 - primal space is unchanged 933 AND 934 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 935 AND 936 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 937 */ 938 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 939 PetscFunctionReturn(0); 940 } 941 /* destroy old objects */ 942 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 943 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 944 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 945 /* Set Non-overlapping dimensions */ 946 n_B = pcis->n_B; n_D = pcis->n - n_B; 947 n_vertices = pcbddc->n_actual_vertices; 948 /* create auxiliary bitmask */ 949 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 950 for (i=0;i<n_vertices;i++) { 951 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 952 } 953 954 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 955 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 956 for (i=0, n_R=0; i<pcis->n; i++) { 957 if (!PetscBTLookup(bitmask,i)) { 958 idx_R_local[n_R] = i; 959 n_R++; 960 } 961 } 962 963 /* Block code */ 964 vbs = 1; 965 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 966 if (bs>1 && !(n_vertices%bs)) { 967 PetscBool is_blocked = PETSC_TRUE; 968 PetscInt *vary; 969 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 970 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 971 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 972 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 973 for (i=0; i<n_vertices; i++) { 974 if (vary[i]!=0 && vary[i]!=bs) { 975 is_blocked = PETSC_FALSE; 976 break; 977 } 978 } 979 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 980 vbs = bs; 981 for (i=0;i<n_R/vbs;i++) { 982 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 983 } 984 } 985 ierr = PetscFree(vary);CHKERRQ(ierr); 986 } 987 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 988 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 989 990 /* print some info if requested */ 991 if (pcbddc->dbg_flag) { 992 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 993 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 994 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 995 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 996 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 997 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 998 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 999 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1000 } 1001 1002 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1003 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1004 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1005 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1006 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1007 for (i=0; i<n_D; i++) { 1008 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1009 } 1010 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1011 for (i=0, j=0; i<n_R; i++) { 1012 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1013 aux_array1[j++] = i; 1014 } 1015 } 1016 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1017 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1018 for (i=0, j=0; i<n_B; i++) { 1019 if (!PetscBTLookup(bitmask,is_indices[i])) { 1020 aux_array2[j++] = i; 1021 } 1022 } 1023 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1024 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1025 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1026 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1027 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1028 1029 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1030 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1031 for (i=0, j=0; i<n_R; i++) { 1032 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1033 aux_array1[j++] = i; 1034 } 1035 } 1036 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1037 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1038 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1039 } 1040 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1041 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1048 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1049 { 1050 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1051 PC_IS *pcis = (PC_IS*)pc->data; 1052 PC pc_temp; 1053 Mat A_RR; 1054 MatReuse reuse; 1055 PetscScalar m_one = -1.0; 1056 PetscReal value; 1057 PetscInt n_D,n_R,ibs,mbs; 1058 PetscBool use_exact,use_exact_reduced,issbaij; 1059 PetscErrorCode ierr; 1060 /* prefixes stuff */ 1061 char dir_prefix[256],neu_prefix[256],str_level[16]; 1062 size_t len; 1063 1064 PetscFunctionBegin; 1065 1066 /* compute prefixes */ 1067 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1068 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1069 if (!pcbddc->current_level) { 1070 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1071 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1072 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1073 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1074 } else { 1075 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1076 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1077 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1078 len -= 15; /* remove "pc_bddc_coarse_" */ 1079 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1080 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1081 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1082 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1083 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1084 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1085 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1086 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1087 } 1088 1089 /* DIRICHLET PROBLEM */ 1090 /* Matrix for Dirichlet problem is pcis->A_II */ 1091 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1092 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1093 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1094 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1095 /* default */ 1096 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1097 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1098 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1099 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1100 if (issbaij) { 1101 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1102 } else { 1103 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1104 } 1105 /* Allow user's customization */ 1106 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1107 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1108 } 1109 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1110 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1111 if (!n_D) { 1112 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1113 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1114 } 1115 /* Set Up KSP for Dirichlet problem of BDDC */ 1116 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1117 /* set ksp_D into pcis data */ 1118 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1119 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1120 pcis->ksp_D = pcbddc->ksp_D; 1121 1122 /* NEUMANN PROBLEM */ 1123 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1124 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1125 if (pcbddc->ksp_R) { /* already created ksp */ 1126 PetscInt nn_R; 1127 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1128 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1129 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1130 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1131 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1132 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1133 reuse = MAT_INITIAL_MATRIX; 1134 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1135 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1136 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1137 reuse = MAT_INITIAL_MATRIX; 1138 } else { /* safe to reuse the matrix */ 1139 reuse = MAT_REUSE_MATRIX; 1140 } 1141 } 1142 /* last check */ 1143 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1144 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1145 reuse = MAT_INITIAL_MATRIX; 1146 } 1147 } else { /* first time, so we need to create the matrix */ 1148 reuse = MAT_INITIAL_MATRIX; 1149 } 1150 /* extract A_RR */ 1151 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1152 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1153 if (ibs != mbs) { 1154 Mat newmat; 1155 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1156 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1157 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1158 } else { 1159 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1160 } 1161 if (!pcbddc->ksp_R) { /* create object if not present */ 1162 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1163 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1164 /* default */ 1165 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1166 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1167 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1168 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1169 if (issbaij) { 1170 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1171 } else { 1172 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1173 } 1174 /* Allow user's customization */ 1175 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1176 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1177 } 1178 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1179 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1180 if (!n_R) { 1181 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1182 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1183 } 1184 /* Set Up KSP for Neumann problem of BDDC */ 1185 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1186 1187 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1188 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1189 /* Dirichlet */ 1190 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1191 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1192 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1193 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1194 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1195 /* need to be adapted? */ 1196 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1197 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1198 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1199 /* print info */ 1200 if (pcbddc->dbg_flag) { 1201 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1202 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1203 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1204 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1205 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1206 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1207 } 1208 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1209 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1210 } 1211 1212 /* Neumann */ 1213 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1214 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1215 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1216 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1217 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1218 /* need to be adapted? */ 1219 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1220 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1221 /* print info */ 1222 if (pcbddc->dbg_flag) { 1223 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1224 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1225 } 1226 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1227 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1228 } 1229 } 1230 /* free Neumann problem's matrix */ 1231 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1237 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1238 { 1239 PetscErrorCode ierr; 1240 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1241 1242 PetscFunctionBegin; 1243 if (applytranspose) { 1244 if (pcbddc->local_auxmat1) { 1245 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1246 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1247 } 1248 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1249 } else { 1250 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1251 if (pcbddc->local_auxmat1) { 1252 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1253 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1254 } 1255 } 1256 PetscFunctionReturn(0); 1257 } 1258 1259 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1262 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1263 { 1264 PetscErrorCode ierr; 1265 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1266 PC_IS* pcis = (PC_IS*) (pc->data); 1267 const PetscScalar zero = 0.0; 1268 1269 PetscFunctionBegin; 1270 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1271 if (applytranspose) { 1272 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1273 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1274 } else { 1275 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1276 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1277 } 1278 /* start communications from local primal nodes to rhs of coarse solver */ 1279 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1280 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1281 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1282 1283 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1284 /* TODO remove null space when doing multilevel */ 1285 if (pcbddc->coarse_ksp) { 1286 if (applytranspose) { 1287 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1288 } else { 1289 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1290 } 1291 } 1292 1293 /* Local solution on R nodes */ 1294 if (pcis->n) { 1295 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1296 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1297 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1298 if (pcbddc->switch_static) { 1299 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1300 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1301 } 1302 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1303 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1304 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1305 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1306 if (pcbddc->switch_static) { 1307 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1308 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1309 } 1310 } 1311 1312 /* communications from coarse sol to local primal nodes */ 1313 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1314 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1315 1316 /* Sum contributions from two levels */ 1317 if (applytranspose) { 1318 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1319 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1320 } else { 1321 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1322 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1323 } 1324 PetscFunctionReturn(0); 1325 } 1326 1327 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1330 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1331 { 1332 PetscErrorCode ierr; 1333 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1334 PetscScalar *array,*array2; 1335 Vec from,to; 1336 1337 PetscFunctionBegin; 1338 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1339 from = pcbddc->coarse_vec; 1340 to = pcbddc->vec1_P; 1341 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1342 Vec tvec; 1343 PetscInt lsize; 1344 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1345 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1346 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1347 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1348 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1349 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1350 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1351 } 1352 } else { /* from local to global -> put data in coarse right hand side */ 1353 from = pcbddc->vec1_P; 1354 to = pcbddc->coarse_vec; 1355 } 1356 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1357 PetscFunctionReturn(0); 1358 } 1359 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1362 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1363 { 1364 PetscErrorCode ierr; 1365 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1366 PetscScalar *array,*array2; 1367 Vec from,to; 1368 1369 PetscFunctionBegin; 1370 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1371 from = pcbddc->coarse_vec; 1372 to = pcbddc->vec1_P; 1373 } else { /* from local to global -> put data in coarse right hand side */ 1374 from = pcbddc->vec1_P; 1375 to = pcbddc->coarse_vec; 1376 } 1377 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1378 if (smode == SCATTER_FORWARD) { 1379 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1380 Vec tvec; 1381 PetscInt lsize; 1382 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1383 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1384 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1385 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1386 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1387 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1388 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1389 } 1390 } 1391 PetscFunctionReturn(0); 1392 } 1393 1394 /* uncomment for testing purposes */ 1395 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1398 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1399 { 1400 PetscErrorCode ierr; 1401 PC_IS* pcis = (PC_IS*)(pc->data); 1402 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1403 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1404 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1405 MatType impMatType=MATSEQAIJ; 1406 /* one and zero */ 1407 PetscScalar one=1.0,zero=0.0; 1408 /* space to store constraints and their local indices */ 1409 PetscScalar *temp_quadrature_constraint; 1410 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1411 /* iterators */ 1412 PetscInt i,j,k,total_counts,temp_start_ptr; 1413 /* stuff to store connected components stored in pcbddc->mat_graph */ 1414 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1415 PetscInt n_ISForFaces,n_ISForEdges; 1416 /* near null space stuff */ 1417 MatNullSpace nearnullsp; 1418 const Vec *nearnullvecs; 1419 Vec *localnearnullsp; 1420 PetscBool nnsp_has_cnst; 1421 PetscInt nnsp_size; 1422 PetscScalar *array; 1423 /* BLAS integers */ 1424 PetscBLASInt lwork,lierr; 1425 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1426 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1427 /* LAPACK working arrays for SVD or POD */ 1428 PetscBool skip_lapack; 1429 PetscScalar *work; 1430 PetscReal *singular_vals; 1431 #if defined(PETSC_USE_COMPLEX) 1432 PetscReal *rwork; 1433 #endif 1434 #if defined(PETSC_MISSING_LAPACK_GESVD) 1435 PetscBLASInt Blas_one_2=1; 1436 PetscScalar *temp_basis,*correlation_mat; 1437 #else 1438 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1439 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1440 #endif 1441 /* reuse */ 1442 PetscInt olocal_primal_size; 1443 PetscInt *oprimal_indices_local_idxs; 1444 /* change of basis */ 1445 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1446 PetscBool boolforchange,qr_needed; 1447 PetscBT touched,change_basis,qr_needed_idx; 1448 /* auxiliary stuff */ 1449 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1450 PetscInt ncc,*gidxs,*permutation,*temp_indices_to_constraint_work; 1451 PetscScalar *temp_quadrature_constraint_work; 1452 /* some quantities */ 1453 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1454 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1455 1456 1457 PetscFunctionBegin; 1458 /* Destroy Mat objects computed previously */ 1459 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1460 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1461 /* Get index sets for faces, edges and vertices from graph */ 1462 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1463 /* free unneeded index sets */ 1464 if (!pcbddc->use_vertices) { 1465 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1466 } 1467 if (!pcbddc->use_edges) { 1468 for (i=0;i<n_ISForEdges;i++) { 1469 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1470 } 1471 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1472 n_ISForEdges = 0; 1473 } 1474 if (!pcbddc->use_faces) { 1475 for (i=0;i<n_ISForFaces;i++) { 1476 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1477 } 1478 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1479 n_ISForFaces = 0; 1480 } 1481 /* HACKS (the following two blocks of code) */ 1482 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1483 pcbddc->use_change_of_basis = PETSC_TRUE; 1484 if (!ISForEdges) { 1485 pcbddc->use_change_on_faces = PETSC_TRUE; 1486 } 1487 } 1488 if (pcbddc->NullSpace) { 1489 /* use_change_of_basis should be consistent among processors */ 1490 PetscBool tbool[2],gbool[2]; 1491 tbool [0] = pcbddc->use_change_of_basis; 1492 tbool [1] = pcbddc->use_change_on_faces; 1493 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1494 pcbddc->use_change_of_basis = gbool[0]; 1495 pcbddc->use_change_on_faces = gbool[1]; 1496 } 1497 /* print some info */ 1498 if (pcbddc->dbg_flag) { 1499 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1500 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1501 i = 0; 1502 if (ISForVertices) { 1503 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1504 } 1505 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1506 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1507 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1508 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1509 } 1510 /* check if near null space is attached to global mat */ 1511 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1512 if (nearnullsp) { 1513 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1514 /* remove any stored info */ 1515 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1516 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1517 /* store information for BDDC solver reuse */ 1518 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1519 pcbddc->onearnullspace = nearnullsp; 1520 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1521 for (i=0;i<nnsp_size;i++) { 1522 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1523 } 1524 } else { /* if near null space is not provided BDDC uses constants by default */ 1525 nnsp_size = 0; 1526 nnsp_has_cnst = PETSC_TRUE; 1527 } 1528 /* get max number of constraints on a single cc */ 1529 max_constraints = nnsp_size; 1530 if (nnsp_has_cnst) max_constraints++; 1531 1532 /* 1533 Evaluate maximum storage size needed by the procedure 1534 - temp_indices will contain start index of each constraint stored as follows 1535 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1536 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1537 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1538 */ 1539 total_counts = n_ISForFaces+n_ISForEdges; 1540 total_counts *= max_constraints; 1541 n_vertices = 0; 1542 if (ISForVertices) { 1543 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1544 } 1545 total_counts += n_vertices; 1546 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1547 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1548 total_counts = 0; 1549 max_size_of_constraint = 0; 1550 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1551 if (i<n_ISForEdges) { 1552 used_IS = &ISForEdges[i]; 1553 } else { 1554 used_IS = &ISForFaces[i-n_ISForEdges]; 1555 } 1556 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1557 total_counts += j; 1558 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1559 } 1560 total_counts *= max_constraints; 1561 total_counts += n_vertices; 1562 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1563 /* get local part of global near null space vectors */ 1564 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1565 for (k=0;k<nnsp_size;k++) { 1566 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1567 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 } 1570 1571 /* whether or not to skip lapack calls */ 1572 skip_lapack = PETSC_TRUE; 1573 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1574 1575 /* allocate some auxiliary stuff */ 1576 if (!skip_lapack || pcbddc->use_qr_single) { 1577 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1578 } 1579 1580 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1581 if (!skip_lapack) { 1582 PetscScalar temp_work; 1583 1584 #if defined(PETSC_MISSING_LAPACK_GESVD) 1585 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1586 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1587 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1588 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1589 #if defined(PETSC_USE_COMPLEX) 1590 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1591 #endif 1592 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1593 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1594 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1595 lwork = -1; 1596 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1597 #if !defined(PETSC_USE_COMPLEX) 1598 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1599 #else 1600 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1601 #endif 1602 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1603 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1604 #else /* on missing GESVD */ 1605 /* SVD */ 1606 PetscInt max_n,min_n; 1607 max_n = max_size_of_constraint; 1608 min_n = max_constraints; 1609 if (max_size_of_constraint < max_constraints) { 1610 min_n = max_size_of_constraint; 1611 max_n = max_constraints; 1612 } 1613 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1614 #if defined(PETSC_USE_COMPLEX) 1615 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1616 #endif 1617 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1618 lwork = -1; 1619 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1620 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1621 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1622 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1623 #if !defined(PETSC_USE_COMPLEX) 1624 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1625 #else 1626 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1627 #endif 1628 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1629 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1630 #endif /* on missing GESVD */ 1631 /* Allocate optimal workspace */ 1632 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1633 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1634 } 1635 /* Now we can loop on constraining sets */ 1636 total_counts = 0; 1637 temp_indices[0] = 0; 1638 /* vertices */ 1639 if (ISForVertices) { 1640 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1641 if (nnsp_has_cnst) { /* consider all vertices */ 1642 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1643 for (i=0;i<n_vertices;i++) { 1644 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1645 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1646 total_counts++; 1647 } 1648 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1649 PetscBool used_vertex; 1650 for (i=0;i<n_vertices;i++) { 1651 used_vertex = PETSC_FALSE; 1652 k = 0; 1653 while (!used_vertex && k<nnsp_size) { 1654 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1655 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1656 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1657 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1658 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1659 total_counts++; 1660 used_vertex = PETSC_TRUE; 1661 } 1662 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1663 k++; 1664 } 1665 } 1666 } 1667 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1668 n_vertices = total_counts; 1669 } 1670 1671 /* edges and faces */ 1672 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1673 if (ncc<n_ISForEdges) { 1674 used_IS = &ISForEdges[ncc]; 1675 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1676 } else { 1677 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1678 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1679 } 1680 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1681 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1682 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1683 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1684 /* change of basis should not be performed on local periodic nodes */ 1685 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1686 if (nnsp_has_cnst) { 1687 PetscScalar quad_value; 1688 temp_constraints++; 1689 if (!pcbddc->use_nnsp_true) { 1690 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1691 } else { 1692 quad_value = 1.0; 1693 } 1694 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1695 for (j=0;j<size_of_constraint;j++) { 1696 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1697 } 1698 /* sort by global ordering if using lapack subroutines */ 1699 if (!skip_lapack || pcbddc->use_qr_single) { 1700 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1701 for (j=0;j<size_of_constraint;j++) { 1702 permutation[j]=j; 1703 } 1704 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1705 for (j=0;j<size_of_constraint;j++) { 1706 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1707 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1708 } 1709 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1710 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1711 } 1712 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1713 total_counts++; 1714 } 1715 for (k=0;k<nnsp_size;k++) { 1716 PetscReal real_value; 1717 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1718 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1719 for (j=0;j<size_of_constraint;j++) { 1720 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1721 } 1722 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1723 /* check if array is null on the connected component */ 1724 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1725 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1726 if (real_value > 0.0) { /* keep indices and values */ 1727 /* sort by global ordering if using lapack subroutines */ 1728 if (!skip_lapack || pcbddc->use_qr_single) { 1729 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1730 for (j=0;j<size_of_constraint;j++) { 1731 permutation[j]=j; 1732 } 1733 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1734 for (j=0;j<size_of_constraint;j++) { 1735 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1736 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1737 } 1738 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1739 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1740 } 1741 temp_constraints++; 1742 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1743 total_counts++; 1744 } 1745 } 1746 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1747 valid_constraints = temp_constraints; 1748 if (!pcbddc->use_nnsp_true && temp_constraints) { 1749 if (temp_constraints == 1) { /* just normalize the constraint */ 1750 PetscScalar norm; 1751 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1752 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1753 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1754 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1755 } else { /* perform SVD */ 1756 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1757 1758 #if defined(PETSC_MISSING_LAPACK_GESVD) 1759 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1760 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1761 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1762 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1763 from that computed using LAPACKgesvd 1764 -> This is due to a different computation of eigenvectors in LAPACKheev 1765 -> The quality of the POD-computed basis will be the same */ 1766 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1767 /* Store upper triangular part of correlation matrix */ 1768 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1769 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1770 for (j=0;j<temp_constraints;j++) { 1771 for (k=0;k<j+1;k++) { 1772 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1773 } 1774 } 1775 /* compute eigenvalues and eigenvectors of correlation matrix */ 1776 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1777 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1778 #if !defined(PETSC_USE_COMPLEX) 1779 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1780 #else 1781 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1782 #endif 1783 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1784 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1785 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1786 j = 0; 1787 while (j < temp_constraints && singular_vals[j] < tol) j++; 1788 total_counts = total_counts-j; 1789 valid_constraints = temp_constraints-j; 1790 /* scale and copy POD basis into used quadrature memory */ 1791 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1792 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1793 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1794 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1795 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1796 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1797 if (j<temp_constraints) { 1798 PetscInt ii; 1799 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1800 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1801 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1802 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1803 for (k=0;k<temp_constraints-j;k++) { 1804 for (ii=0;ii<size_of_constraint;ii++) { 1805 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1806 } 1807 } 1808 } 1809 #else /* on missing GESVD */ 1810 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1811 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1812 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1813 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1814 #if !defined(PETSC_USE_COMPLEX) 1815 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1816 #else 1817 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1818 #endif 1819 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1820 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1821 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1822 k = temp_constraints; 1823 if (k > size_of_constraint) k = size_of_constraint; 1824 j = 0; 1825 while (j < k && singular_vals[k-j-1] < tol) j++; 1826 valid_constraints = k-j; 1827 total_counts = total_counts-temp_constraints+valid_constraints; 1828 #endif /* on missing GESVD */ 1829 } 1830 } 1831 /* setting change_of_basis flag is safe now */ 1832 if (boolforchange) { 1833 for (j=0;j<valid_constraints;j++) { 1834 PetscBTSet(change_basis,total_counts-j-1); 1835 } 1836 } 1837 } 1838 /* free index sets of faces, edges and vertices */ 1839 for (i=0;i<n_ISForFaces;i++) { 1840 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1841 } 1842 if (n_ISForFaces) { 1843 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1844 } 1845 for (i=0;i<n_ISForEdges;i++) { 1846 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1847 } 1848 if (n_ISForEdges) { 1849 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1850 } 1851 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1852 /* map temp_indices_to_constraint in boundary numbering */ 1853 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1854 if (i != temp_indices[total_counts]) { 1855 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1856 } 1857 1858 /* free workspace */ 1859 if (!skip_lapack || pcbddc->use_qr_single) { 1860 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 1861 } 1862 if (!skip_lapack) { 1863 ierr = PetscFree(work);CHKERRQ(ierr); 1864 #if defined(PETSC_USE_COMPLEX) 1865 ierr = PetscFree(rwork);CHKERRQ(ierr); 1866 #endif 1867 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1868 #if defined(PETSC_MISSING_LAPACK_GESVD) 1869 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1870 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1871 #endif 1872 } 1873 for (k=0;k<nnsp_size;k++) { 1874 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1875 } 1876 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1877 1878 /* set quantities in pcbddc data structure and store previous primal size */ 1879 /* n_vertices defines the number of subdomain corners in the primal space */ 1880 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1881 olocal_primal_size = pcbddc->local_primal_size; 1882 pcbddc->local_primal_size = total_counts; 1883 pcbddc->n_vertices = n_vertices; 1884 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1885 1886 /* Create constraint matrix */ 1887 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1888 /* so we need to set it up properly either with or without change of basis */ 1889 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1890 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1891 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1892 /* array to compute a local numbering of constraints : vertices first then constraints */ 1893 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1894 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1895 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1896 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1897 /* auxiliary stuff for basis change */ 1898 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1899 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1900 1901 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1902 total_primal_vertices=0; 1903 for (i=0;i<pcbddc->local_primal_size;i++) { 1904 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1905 if (size_of_constraint == 1) { 1906 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1907 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1908 aux_primal_minloc[total_primal_vertices]=0; 1909 total_primal_vertices++; 1910 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1911 PetscInt min_loc,min_index; 1912 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1913 /* find first untouched local node */ 1914 k = 0; 1915 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1916 min_index = global_indices[k]; 1917 min_loc = k; 1918 /* search the minimum among global nodes already untouched on the cc */ 1919 for (k=1;k<size_of_constraint;k++) { 1920 /* there can be more than one constraint on a single connected component */ 1921 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1922 min_index = global_indices[k]; 1923 min_loc = k; 1924 } 1925 } 1926 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1927 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1928 aux_primal_minloc[total_primal_vertices]=min_loc; 1929 total_primal_vertices++; 1930 } 1931 } 1932 /* determine if a QR strategy is needed for change of basis */ 1933 qr_needed = PETSC_FALSE; 1934 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1935 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1936 if (PetscBTLookup(change_basis,i)) { 1937 if (!pcbddc->use_qr_single) { 1938 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1939 j = 0; 1940 for (k=0;k<size_of_constraint;k++) { 1941 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1942 j++; 1943 } 1944 } 1945 /* found more than one primal dof on the cc */ 1946 if (j > 1) { 1947 PetscBTSet(qr_needed_idx,i); 1948 qr_needed = PETSC_TRUE; 1949 } 1950 } else { 1951 PetscBTSet(qr_needed_idx,i); 1952 qr_needed = PETSC_TRUE; 1953 } 1954 } 1955 } 1956 /* free workspace */ 1957 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1958 1959 /* permute indices in order to have a sorted set of vertices */ 1960 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1961 1962 /* nonzero structure of constraint matrix */ 1963 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1964 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1965 j=total_primal_vertices; 1966 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1967 if (!PetscBTLookup(change_basis,i)) { 1968 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1969 j++; 1970 } 1971 } 1972 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1973 ierr = PetscFree(nnz);CHKERRQ(ierr); 1974 /* set values in constraint matrix */ 1975 for (i=0;i<total_primal_vertices;i++) { 1976 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1977 } 1978 total_counts = total_primal_vertices; 1979 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1980 if (!PetscBTLookup(change_basis,i)) { 1981 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1982 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1983 total_counts++; 1984 } 1985 } 1986 /* assembling */ 1987 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1988 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1989 /* 1990 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1991 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1992 */ 1993 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1994 if (pcbddc->use_change_of_basis) { 1995 /* dual and primal dofs on a single cc */ 1996 PetscInt dual_dofs,primal_dofs; 1997 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1998 PetscInt primal_counter; 1999 /* working stuff for GEQRF */ 2000 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2001 PetscBLASInt lqr_work; 2002 /* working stuff for UNGQR */ 2003 PetscScalar *gqr_work,lgqr_work_t; 2004 PetscBLASInt lgqr_work; 2005 /* working stuff for TRTRS */ 2006 PetscScalar *trs_rhs; 2007 PetscBLASInt Blas_NRHS; 2008 /* pointers for values insertion into change of basis matrix */ 2009 PetscInt *start_rows,*start_cols; 2010 PetscScalar *start_vals; 2011 /* working stuff for values insertion */ 2012 PetscBT is_primal; 2013 /* matrix sizes */ 2014 PetscInt global_size,local_size; 2015 /* work array for nonzeros */ 2016 PetscScalar *nnz_array; 2017 /* temporary change of basis */ 2018 Mat localChangeOfBasisMatrix; 2019 /* auxiliary work for global change of basis */ 2020 Vec nnz_vec; 2021 PetscInt *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz; 2022 PetscInt nvtxs,*xadj,*adjncy,*idxs_mapped; 2023 PetscScalar *vals; 2024 PetscBool done; 2025 2026 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2027 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2028 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2029 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2030 2031 /* nonzeros for local mat */ 2032 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 2033 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 2034 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2035 if (PetscBTLookup(change_basis,i)) { 2036 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2037 if (PetscBTLookup(qr_needed_idx,i)) { 2038 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2039 } else { 2040 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 2041 /* get local primal index on the cc */ 2042 j = 0; 2043 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2044 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2045 } 2046 } 2047 } 2048 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2049 /* Set initial identity in the matrix */ 2050 for (i=0;i<pcis->n_B;i++) { 2051 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2052 } 2053 2054 if (pcbddc->dbg_flag) { 2055 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2056 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2057 } 2058 2059 2060 /* Now we loop on the constraints which need a change of basis */ 2061 /* 2062 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2063 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2064 2065 Basic blocks of change of basis matrix T computed by 2066 2067 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2068 2069 | 1 0 ... 0 s_1/S | 2070 | 0 1 ... 0 s_2/S | 2071 | ... | 2072 | 0 ... 1 s_{n-1}/S | 2073 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2074 2075 with S = \sum_{i=1}^n s_i^2 2076 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2077 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2078 2079 - QR decomposition of constraints otherwise 2080 */ 2081 if (qr_needed) { 2082 /* space to store Q */ 2083 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2084 /* first we issue queries for optimal work */ 2085 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2086 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2087 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2088 lqr_work = -1; 2089 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2090 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2091 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2092 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2093 lgqr_work = -1; 2094 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2095 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2096 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2097 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2098 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2099 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2100 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2101 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2102 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2103 /* array to store scaling factors for reflectors */ 2104 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2105 /* array to store rhs and solution of triangular solver */ 2106 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2107 /* allocating workspace for check */ 2108 if (pcbddc->dbg_flag) { 2109 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2110 } 2111 } 2112 /* array to store whether a node is primal or not */ 2113 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2114 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2115 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2116 if (i != total_primal_vertices) { 2117 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2118 } 2119 for (i=0;i<total_primal_vertices;i++) { 2120 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2121 } 2122 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2123 2124 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2125 /* -> using implicit ordering contained in temp_indices data */ 2126 total_counts = pcbddc->n_vertices; 2127 primal_counter = total_counts; 2128 while (total_counts<pcbddc->local_primal_size) { 2129 primal_dofs = 1; 2130 if (PetscBTLookup(change_basis,total_counts)) { 2131 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2132 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2133 primal_dofs++; 2134 } 2135 /* get constraint info */ 2136 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2137 dual_dofs = size_of_constraint-primal_dofs; 2138 2139 if (pcbddc->dbg_flag) { 2140 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2141 } 2142 2143 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2144 2145 /* copy quadrature constraints for change of basis check */ 2146 if (pcbddc->dbg_flag) { 2147 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2148 } 2149 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2150 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2151 2152 /* compute QR decomposition of constraints */ 2153 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2154 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2155 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2156 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2157 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2158 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2159 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2160 2161 /* explictly compute R^-T */ 2162 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2163 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2164 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2165 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2166 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2167 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2168 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2169 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2170 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2171 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2172 2173 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2174 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2175 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2176 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2177 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2178 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2179 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2180 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2181 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2182 2183 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2184 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2185 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2186 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2187 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2188 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2189 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2190 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2191 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2192 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2193 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2194 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2195 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2196 2197 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2198 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2199 /* insert cols for primal dofs */ 2200 for (j=0;j<primal_dofs;j++) { 2201 start_vals = &qr_basis[j*size_of_constraint]; 2202 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2203 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2204 } 2205 /* insert cols for dual dofs */ 2206 for (j=0,k=0;j<dual_dofs;k++) { 2207 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2208 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2209 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2210 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2211 j++; 2212 } 2213 } 2214 2215 /* check change of basis */ 2216 if (pcbddc->dbg_flag) { 2217 PetscInt ii,jj; 2218 PetscBool valid_qr=PETSC_TRUE; 2219 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2220 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2221 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2222 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2223 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2224 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2225 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2226 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2227 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2228 for (jj=0;jj<size_of_constraint;jj++) { 2229 for (ii=0;ii<primal_dofs;ii++) { 2230 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2231 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2232 } 2233 } 2234 if (!valid_qr) { 2235 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2236 for (jj=0;jj<size_of_constraint;jj++) { 2237 for (ii=0;ii<primal_dofs;ii++) { 2238 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2239 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2240 } 2241 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2242 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2243 } 2244 } 2245 } 2246 } else { 2247 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2248 } 2249 } 2250 } else { /* simple transformation block */ 2251 PetscInt row,col; 2252 PetscScalar val,norm; 2253 2254 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2255 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2256 for (j=0;j<size_of_constraint;j++) { 2257 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2258 if (!PetscBTLookup(is_primal,row)) { 2259 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2260 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2261 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2262 } else { 2263 for (k=0;k<size_of_constraint;k++) { 2264 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2265 if (row != col) { 2266 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2267 } else { 2268 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2269 } 2270 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2271 } 2272 } 2273 } 2274 if (pcbddc->dbg_flag) { 2275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2276 } 2277 } 2278 /* increment primal counter */ 2279 primal_counter += primal_dofs; 2280 } else { 2281 if (pcbddc->dbg_flag) { 2282 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2283 } 2284 } 2285 /* increment constraint counter total_counts */ 2286 total_counts += primal_dofs; 2287 } 2288 2289 /* free workspace */ 2290 if (qr_needed) { 2291 if (pcbddc->dbg_flag) { 2292 ierr = PetscFree(work);CHKERRQ(ierr); 2293 } 2294 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2295 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2296 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2297 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2298 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2299 } 2300 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2301 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2302 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2303 2304 /* assembling of global change of variable */ 2305 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2306 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2307 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2308 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2309 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2310 ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr); 2311 2312 /* nonzeros (overestimated) */ 2313 ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr); 2314 ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr); 2315 ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr); 2316 for (i=0;i<pcis->n;i++) { 2317 nnz_array[i] = 1.0; 2318 idxs_all[i] = i; 2319 } 2320 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2321 for (i=0;i<pcis->n_B;i++) { 2322 nnz_array[idxs_B[i]] = nnz[i]; 2323 } 2324 if (pcis->n) { 2325 ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr); 2326 } 2327 ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr); 2328 ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr); 2329 ierr = PetscFree(nnz);CHKERRQ(ierr); 2330 ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr); 2331 ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr); 2332 ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2333 for (i=0;i<local_size;i++) { 2334 d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size); 2335 o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size); 2336 } 2337 ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2338 ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr); 2339 ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2340 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2341 2342 /* Set identity on dirichlet dofs */ 2343 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2344 for (i=0;i<pcis->n-pcis->n_B;i++) { 2345 PetscScalar one=1.0; 2346 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr); 2347 } 2348 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2349 2350 /* Set values at interface dofs */ 2351 done = PETSC_TRUE; 2352 ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2353 if (!done) { 2354 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2355 } 2356 ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2357 ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr); 2358 ierr = ISLocalToGlobalMappingApply(pcbddc->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr); 2359 for (i=0;i<nvtxs;i++) { 2360 PetscInt row,*cols,ncols; 2361 PetscScalar *mat_vals; 2362 2363 row = idxs_B[i]; 2364 ncols = xadj[i+1]-xadj[i]; 2365 cols = idxs_mapped+xadj[i]; 2366 mat_vals = vals+xadj[i]; 2367 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr); 2368 } 2369 ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2370 if (!done) { 2371 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2372 } 2373 ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2374 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2375 ierr = PetscFree(idxs_mapped);CHKERRQ(ierr); 2376 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2377 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2378 2379 /* check */ 2380 if (pcbddc->dbg_flag) { 2381 PetscReal error; 2382 Vec x,x_change; 2383 2384 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2385 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2386 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2387 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2388 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2389 ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2390 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 2391 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2392 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2393 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2394 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2395 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2396 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2397 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr); 2398 ierr = VecDestroy(&x);CHKERRQ(ierr); 2399 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2400 } 2401 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2402 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2403 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2404 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2405 } 2406 2407 /* set up change of basis context */ 2408 if (pcbddc->ChangeOfBasisMatrix) { 2409 PCBDDCChange_ctx change_ctx; 2410 2411 if (!pcbddc->new_global_mat) { 2412 PetscInt global_size,local_size; 2413 2414 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2415 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2416 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2417 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2418 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2419 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2420 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2421 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2422 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2423 } else { 2424 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2425 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2426 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2427 } 2428 if (!pcbddc->user_ChangeOfBasisMatrix) { 2429 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2430 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2431 } else { 2432 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2433 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2434 } 2435 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2436 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2437 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2438 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2439 } 2440 2441 /* get indices in local ordering for vertices and constraints */ 2442 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2443 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2444 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2445 } 2446 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2447 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2448 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2449 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2450 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2451 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2452 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2453 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2454 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2455 /* set quantities in PCBDDC data struct */ 2456 pcbddc->n_actual_vertices = i; 2457 /* check if a new primal space has been introduced */ 2458 pcbddc->new_primal_space_local = PETSC_TRUE; 2459 if (olocal_primal_size == pcbddc->local_primal_size) { 2460 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2461 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2462 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2463 } 2464 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2465 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2466 2467 /* flush dbg viewer */ 2468 if (pcbddc->dbg_flag) { 2469 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2470 } 2471 2472 /* free workspace */ 2473 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2474 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2475 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2476 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2477 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2478 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2479 PetscFunctionReturn(0); 2480 } 2481 2482 #undef __FUNCT__ 2483 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2484 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2485 { 2486 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2487 PC_IS *pcis = (PC_IS*)pc->data; 2488 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2489 PetscInt ierr,i,vertex_size; 2490 PetscViewer viewer=pcbddc->dbg_viewer; 2491 2492 PetscFunctionBegin; 2493 /* Reset previously computed graph */ 2494 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2495 /* Init local Graph struct */ 2496 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2497 2498 /* Check validity of the csr graph passed in by the user */ 2499 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2500 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2501 } 2502 2503 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2504 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2505 Mat mat_adj; 2506 PetscInt *xadj,*adjncy; 2507 PetscInt nvtxs; 2508 PetscBool flg_row=PETSC_TRUE; 2509 2510 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2511 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2512 if (!flg_row) { 2513 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2514 } 2515 if (pcbddc->use_local_adj) { 2516 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2517 pcbddc->deluxe_compute_rowadj = PETSC_FALSE; 2518 } else { /* just compute subdomain's connected components */ 2519 IS is_dummy; 2520 ISLocalToGlobalMapping l2gmap_dummy; 2521 PetscInt j,sum; 2522 PetscInt *cxadj,*cadjncy; 2523 const PetscInt *idxs; 2524 PCBDDCGraph graph; 2525 PetscBT is_on_boundary; 2526 2527 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2528 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2529 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2530 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2531 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2532 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2533 graph->xadj = xadj; 2534 graph->adjncy = adjncy; 2535 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2536 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2537 2538 if (pcbddc->dbg_flag) { 2539 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2540 for (i=0;i<graph->ncc;i++) { 2541 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2542 } 2543 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2544 } 2545 2546 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2547 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2548 for (i=0;i<pcis->n_B;i++) { 2549 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2550 } 2551 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2552 2553 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2554 sum = 0; 2555 for (i=0;i<graph->ncc;i++) { 2556 PetscInt sizecc = 0; 2557 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2558 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2559 sizecc++; 2560 } 2561 } 2562 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2563 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2564 cxadj[graph->queue[j]] = sizecc; 2565 } 2566 } 2567 sum += sizecc*sizecc; 2568 } 2569 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2570 sum = 0; 2571 for (i=0;i<nvtxs;i++) { 2572 PetscInt temp = cxadj[i]; 2573 cxadj[i] = sum; 2574 sum += temp; 2575 } 2576 cxadj[nvtxs] = sum; 2577 for (i=0;i<graph->ncc;i++) { 2578 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2579 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2580 PetscInt k,sizecc = 0; 2581 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2582 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2583 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2584 sizecc++; 2585 } 2586 } 2587 } 2588 } 2589 } 2590 if (nvtxs) { 2591 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2592 } else { 2593 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2594 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2595 } 2596 graph->xadj = 0; 2597 graph->adjncy = 0; 2598 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2599 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2600 } 2601 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2602 if (!flg_row) { 2603 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2604 } 2605 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2606 } 2607 2608 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2609 vertex_size = 1; 2610 if (pcbddc->user_provided_isfordofs) { 2611 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2612 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2613 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2614 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2615 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2616 } 2617 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2618 pcbddc->n_ISForDofs = 0; 2619 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2620 } 2621 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2622 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2623 } else { 2624 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2625 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2626 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2627 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2628 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2629 } 2630 } 2631 } 2632 2633 /* Setup of Graph */ 2634 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2635 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2636 } 2637 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2638 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2639 } 2640 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2641 2642 /* Graph's connected components analysis */ 2643 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2644 2645 /* print some info to stdout */ 2646 if (pcbddc->dbg_flag) { 2647 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2648 } 2649 2650 /* mark topography has done */ 2651 pcbddc->recompute_topography = PETSC_FALSE; 2652 PetscFunctionReturn(0); 2653 } 2654 2655 #undef __FUNCT__ 2656 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2657 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2658 { 2659 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2660 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2661 PetscErrorCode ierr; 2662 2663 PetscFunctionBegin; 2664 n = 0; 2665 vertices = 0; 2666 if (pcbddc->ConstraintMatrix) { 2667 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2668 for (i=0;i<local_primal_size;i++) { 2669 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2670 if (size_of_constraint == 1) n++; 2671 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2672 } 2673 if (vertices_idx) { 2674 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2675 n = 0; 2676 for (i=0;i<local_primal_size;i++) { 2677 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2678 if (size_of_constraint == 1) { 2679 vertices[n++]=row_cmat_indices[0]; 2680 } 2681 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2682 } 2683 } 2684 } 2685 *n_vertices = n; 2686 if (vertices_idx) *vertices_idx = vertices; 2687 PetscFunctionReturn(0); 2688 } 2689 2690 #undef __FUNCT__ 2691 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2692 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2693 { 2694 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2695 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2696 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2697 PetscBT touched; 2698 PetscErrorCode ierr; 2699 2700 /* This function assumes that the number of local constraints per connected component 2701 is not greater than the number of nodes defined for the connected component 2702 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2703 PetscFunctionBegin; 2704 n = 0; 2705 constraints_index = 0; 2706 if (pcbddc->ConstraintMatrix) { 2707 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2708 max_size_of_constraint = 0; 2709 for (i=0;i<local_primal_size;i++) { 2710 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2711 if (size_of_constraint > 1) { 2712 n++; 2713 } 2714 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2715 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2716 } 2717 if (constraints_idx) { 2718 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2719 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2720 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2721 n = 0; 2722 for (i=0;i<local_primal_size;i++) { 2723 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2724 if (size_of_constraint > 1) { 2725 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2726 /* find first untouched local node */ 2727 j = 0; 2728 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2729 min_index = row_cmat_global_indices[j]; 2730 min_loc = j; 2731 /* search the minimum among nodes not yet touched on the connected component 2732 since there can be more than one constraint on a single cc */ 2733 for (j=1;j<size_of_constraint;j++) { 2734 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2735 min_index = row_cmat_global_indices[j]; 2736 min_loc = j; 2737 } 2738 } 2739 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2740 constraints_index[n++] = row_cmat_indices[min_loc]; 2741 } 2742 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2743 } 2744 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2745 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2746 } 2747 } 2748 *n_constraints = n; 2749 if (constraints_idx) *constraints_idx = constraints_index; 2750 PetscFunctionReturn(0); 2751 } 2752 2753 #undef __FUNCT__ 2754 #define __FUNCT__ "PCBDDCSubsetNumbering" 2755 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2756 { 2757 Vec local_vec,global_vec; 2758 IS seqis,paris; 2759 VecScatter scatter_ctx; 2760 PetscScalar *array; 2761 PetscInt *temp_global_dofs; 2762 PetscScalar globalsum; 2763 PetscInt i,j,s; 2764 PetscInt nlocals,first_index,old_index,max_local; 2765 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2766 PetscMPIInt *dof_sizes,*dof_displs; 2767 PetscBool first_found; 2768 PetscErrorCode ierr; 2769 2770 PetscFunctionBegin; 2771 /* mpi buffers */ 2772 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2773 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2774 j = ( !rank_prec_comm ? size_prec_comm : 0); 2775 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2776 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2777 /* get maximum size of subset */ 2778 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2779 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2780 max_local = 0; 2781 for (i=0;i<n_local_dofs;i++) { 2782 if (max_local < temp_global_dofs[i] ) { 2783 max_local = temp_global_dofs[i]; 2784 } 2785 } 2786 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2787 max_global++; 2788 max_local = 0; 2789 for (i=0;i<n_local_dofs;i++) { 2790 if (max_local < local_dofs[i] ) { 2791 max_local = local_dofs[i]; 2792 } 2793 } 2794 max_local++; 2795 /* allocate workspace */ 2796 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2797 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2798 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2799 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2800 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2801 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2802 /* create scatter */ 2803 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2804 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2805 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2806 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2807 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2808 /* init array */ 2809 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2810 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2811 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2812 if (local_dofs_mult) { 2813 for (i=0;i<n_local_dofs;i++) { 2814 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2815 } 2816 } else { 2817 for (i=0;i<n_local_dofs;i++) { 2818 array[local_dofs[i]]=1.0; 2819 } 2820 } 2821 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2822 /* scatter into global vec and get total number of global dofs */ 2823 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2824 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2825 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2826 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2827 /* Fill global_vec with cumulative function for global numbering */ 2828 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2829 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2830 nlocals = 0; 2831 first_index = -1; 2832 first_found = PETSC_FALSE; 2833 for (i=0;i<s;i++) { 2834 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2835 first_found = PETSC_TRUE; 2836 first_index = i; 2837 } 2838 nlocals += (PetscInt)PetscRealPart(array[i]); 2839 } 2840 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2841 if (!rank_prec_comm) { 2842 dof_displs[0]=0; 2843 for (i=1;i<size_prec_comm;i++) { 2844 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2845 } 2846 } 2847 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2848 if (first_found) { 2849 array[first_index] += (PetscScalar)nlocals; 2850 old_index = first_index; 2851 for (i=first_index+1;i<s;i++) { 2852 if (PetscRealPart(array[i]) > 0.1) { 2853 array[i] += array[old_index]; 2854 old_index = i; 2855 } 2856 } 2857 } 2858 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2859 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2860 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2861 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2862 /* get global ordering of local dofs */ 2863 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2864 if (local_dofs_mult) { 2865 for (i=0;i<n_local_dofs;i++) { 2866 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2867 } 2868 } else { 2869 for (i=0;i<n_local_dofs;i++) { 2870 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2871 } 2872 } 2873 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2874 /* free workspace */ 2875 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2876 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2877 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2878 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2879 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2880 /* return pointer to global ordering of local dofs */ 2881 *global_numbering_subset = temp_global_dofs; 2882 PetscFunctionReturn(0); 2883 } 2884 2885 #undef __FUNCT__ 2886 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2887 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2888 { 2889 PetscInt i,j; 2890 PetscScalar *alphas; 2891 PetscErrorCode ierr; 2892 2893 PetscFunctionBegin; 2894 /* this implements stabilized Gram-Schmidt */ 2895 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2896 for (i=0;i<n;i++) { 2897 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2898 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2899 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2900 } 2901 ierr = PetscFree(alphas);CHKERRQ(ierr); 2902 PetscFunctionReturn(0); 2903 } 2904 2905 #undef __FUNCT__ 2906 #define __FUNCT__ "MatISGetSubassemblingPattern" 2907 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2908 { 2909 Mat subdomain_adj; 2910 IS new_ranks,ranks_send_to; 2911 MatPartitioning partitioner; 2912 Mat_IS *matis; 2913 PetscInt n_neighs,*neighs,*n_shared,**shared; 2914 PetscInt prank; 2915 PetscMPIInt size,rank,color; 2916 PetscInt *xadj,*adjncy,*oldranks; 2917 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2918 PetscInt i,local_size,threshold=0; 2919 PetscErrorCode ierr; 2920 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2921 PetscSubcomm subcomm; 2922 2923 PetscFunctionBegin; 2924 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2925 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2926 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2927 2928 /* Get info on mapping */ 2929 matis = (Mat_IS*)(mat->data); 2930 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2931 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2932 2933 /* build local CSR graph of subdomains' connectivity */ 2934 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2935 xadj[0] = 0; 2936 xadj[1] = PetscMax(n_neighs-1,0); 2937 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2938 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2939 2940 if (threshold) { 2941 PetscInt xadj_count = 0; 2942 for (i=1;i<n_neighs;i++) { 2943 if (n_shared[i] > threshold) { 2944 adjncy[xadj_count] = neighs[i]; 2945 adjncy_wgt[xadj_count] = n_shared[i]; 2946 xadj_count++; 2947 } 2948 } 2949 xadj[1] = xadj_count; 2950 } else { 2951 if (xadj[1]) { 2952 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2953 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2954 } 2955 } 2956 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2957 if (use_square) { 2958 for (i=0;i<xadj[1];i++) { 2959 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2960 } 2961 } 2962 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2963 2964 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 2965 2966 /* 2967 Restrict work on active processes only. 2968 */ 2969 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2970 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2971 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2972 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2973 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2974 if (color) { 2975 ierr = PetscFree(xadj);CHKERRQ(ierr); 2976 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2977 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2978 } else { 2979 PetscInt coarsening_ratio; 2980 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2981 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2982 prank = rank; 2983 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2984 /* 2985 for (i=0;i<size;i++) { 2986 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2987 } 2988 */ 2989 for (i=0;i<xadj[1];i++) { 2990 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2991 } 2992 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2993 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2994 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2995 2996 /* Partition */ 2997 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2998 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2999 if (use_vwgt) { 3000 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3001 v_wgt[0] = local_size; 3002 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3003 } 3004 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3005 coarsening_ratio = size/n_subdomains; 3006 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3007 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3008 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3009 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3010 3011 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3012 if (contiguous) { 3013 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3014 } else { 3015 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3016 } 3017 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3018 /* clean up */ 3019 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3020 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3021 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3022 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3023 } 3024 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3025 3026 /* assemble parallel IS for sends */ 3027 i = 1; 3028 if (color) i=0; 3029 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3030 3031 /* get back IS */ 3032 *is_sends = ranks_send_to; 3033 PetscFunctionReturn(0); 3034 } 3035 3036 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3037 3038 #undef __FUNCT__ 3039 #define __FUNCT__ "MatISSubassemble" 3040 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3041 { 3042 Mat local_mat; 3043 Mat_IS *matis; 3044 IS is_sends_internal; 3045 PetscInt rows,cols,new_local_rows; 3046 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3047 PetscBool ismatis,isdense,newisdense,destroy_mat; 3048 ISLocalToGlobalMapping l2gmap; 3049 PetscInt* l2gmap_indices; 3050 const PetscInt* is_indices; 3051 MatType new_local_type; 3052 /* buffers */ 3053 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3054 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3055 PetscInt *recv_buffer_idxs_local; 3056 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3057 /* MPI */ 3058 MPI_Comm comm,comm_n; 3059 PetscSubcomm subcomm; 3060 PetscMPIInt n_sends,n_recvs,commsize; 3061 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3062 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3063 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3064 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3065 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3066 PetscErrorCode ierr; 3067 3068 PetscFunctionBegin; 3069 /* TODO: add missing checks */ 3070 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3071 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3072 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3073 PetscValidLogicalCollectiveInt(mat,nis,7); 3074 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3075 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3076 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3077 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3078 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3079 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3080 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3081 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3082 PetscInt mrows,mcols,mnrows,mncols; 3083 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3084 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3085 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3086 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3087 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3088 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3089 } 3090 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3091 PetscValidLogicalCollectiveInt(mat,bs,0); 3092 /* prepare IS for sending if not provided */ 3093 if (!is_sends) { 3094 PetscBool pcontig = PETSC_TRUE; 3095 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3096 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3097 } else { 3098 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3099 is_sends_internal = is_sends; 3100 } 3101 3102 /* get pointer of MATIS data */ 3103 matis = (Mat_IS*)mat->data; 3104 3105 /* get comm */ 3106 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3107 3108 /* compute number of sends */ 3109 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3110 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3111 3112 /* compute number of receives */ 3113 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3114 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3115 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3116 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3117 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3118 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3119 ierr = PetscFree(iflags);CHKERRQ(ierr); 3120 3121 /* restrict comm if requested */ 3122 subcomm = 0; 3123 destroy_mat = PETSC_FALSE; 3124 if (restrict_comm) { 3125 PetscMPIInt color,rank,subcommsize; 3126 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3127 color = 0; 3128 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3129 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3130 subcommsize = commsize - subcommsize; 3131 /* check if reuse has been requested */ 3132 if (reuse == MAT_REUSE_MATRIX) { 3133 if (*mat_n) { 3134 PetscMPIInt subcommsize2; 3135 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3136 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3137 comm_n = PetscObjectComm((PetscObject)*mat_n); 3138 } else { 3139 comm_n = PETSC_COMM_SELF; 3140 } 3141 } else { /* MAT_INITIAL_MATRIX */ 3142 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3143 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3144 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3145 comm_n = subcomm->comm; 3146 } 3147 /* flag to destroy *mat_n if not significative */ 3148 if (color) destroy_mat = PETSC_TRUE; 3149 } else { 3150 comm_n = comm; 3151 } 3152 3153 /* prepare send/receive buffers */ 3154 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3155 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3156 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3157 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3158 if (nis) { 3159 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3160 } 3161 3162 /* Get data from local matrices */ 3163 if (!isdense) { 3164 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3165 /* TODO: See below some guidelines on how to prepare the local buffers */ 3166 /* 3167 send_buffer_vals should contain the raw values of the local matrix 3168 send_buffer_idxs should contain: 3169 - MatType_PRIVATE type 3170 - PetscInt size_of_l2gmap 3171 - PetscInt global_row_indices[size_of_l2gmap] 3172 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3173 */ 3174 } else { 3175 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3176 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3177 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3178 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3179 send_buffer_idxs[1] = i; 3180 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3181 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3182 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3183 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3184 for (i=0;i<n_sends;i++) { 3185 ilengths_vals[is_indices[i]] = len*len; 3186 ilengths_idxs[is_indices[i]] = len+2; 3187 } 3188 } 3189 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3190 /* additional is (if any) */ 3191 if (nis) { 3192 PetscMPIInt psum; 3193 PetscInt j; 3194 for (j=0,psum=0;j<nis;j++) { 3195 PetscInt plen; 3196 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3197 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3198 psum += len+1; /* indices + lenght */ 3199 } 3200 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3201 for (j=0,psum=0;j<nis;j++) { 3202 PetscInt plen; 3203 const PetscInt *is_array_idxs; 3204 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3205 send_buffer_idxs_is[psum] = plen; 3206 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3207 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3208 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3209 psum += plen+1; /* indices + lenght */ 3210 } 3211 for (i=0;i<n_sends;i++) { 3212 ilengths_idxs_is[is_indices[i]] = psum; 3213 } 3214 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3215 } 3216 3217 buf_size_idxs = 0; 3218 buf_size_vals = 0; 3219 buf_size_idxs_is = 0; 3220 for (i=0;i<n_recvs;i++) { 3221 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3222 buf_size_vals += (PetscInt)olengths_vals[i]; 3223 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3224 } 3225 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3226 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3227 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3228 3229 /* get new tags for clean communications */ 3230 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3231 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3232 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3233 3234 /* allocate for requests */ 3235 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3236 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3237 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3238 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3239 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3240 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3241 3242 /* communications */ 3243 ptr_idxs = recv_buffer_idxs; 3244 ptr_vals = recv_buffer_vals; 3245 ptr_idxs_is = recv_buffer_idxs_is; 3246 for (i=0;i<n_recvs;i++) { 3247 source_dest = onodes[i]; 3248 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3249 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3250 ptr_idxs += olengths_idxs[i]; 3251 ptr_vals += olengths_vals[i]; 3252 if (nis) { 3253 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3254 ptr_idxs_is += olengths_idxs_is[i]; 3255 } 3256 } 3257 for (i=0;i<n_sends;i++) { 3258 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3259 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3260 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3261 if (nis) { 3262 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3263 } 3264 } 3265 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3266 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3267 3268 /* assemble new l2g map */ 3269 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3270 ptr_idxs = recv_buffer_idxs; 3271 new_local_rows = 0; 3272 for (i=0;i<n_recvs;i++) { 3273 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3274 ptr_idxs += olengths_idxs[i]; 3275 } 3276 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3277 ptr_idxs = recv_buffer_idxs; 3278 new_local_rows = 0; 3279 for (i=0;i<n_recvs;i++) { 3280 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3281 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3282 ptr_idxs += olengths_idxs[i]; 3283 } 3284 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3285 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3286 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3287 3288 /* infer new local matrix type from received local matrices type */ 3289 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3290 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3291 if (n_recvs) { 3292 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3293 ptr_idxs = recv_buffer_idxs; 3294 for (i=0;i<n_recvs;i++) { 3295 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3296 new_local_type_private = MATAIJ_PRIVATE; 3297 break; 3298 } 3299 ptr_idxs += olengths_idxs[i]; 3300 } 3301 switch (new_local_type_private) { 3302 case MATDENSE_PRIVATE: 3303 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3304 new_local_type = MATSEQAIJ; 3305 bs = 1; 3306 } else { /* if I receive only 1 dense matrix */ 3307 new_local_type = MATSEQDENSE; 3308 bs = 1; 3309 } 3310 break; 3311 case MATAIJ_PRIVATE: 3312 new_local_type = MATSEQAIJ; 3313 bs = 1; 3314 break; 3315 case MATBAIJ_PRIVATE: 3316 new_local_type = MATSEQBAIJ; 3317 break; 3318 case MATSBAIJ_PRIVATE: 3319 new_local_type = MATSEQSBAIJ; 3320 break; 3321 default: 3322 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3323 break; 3324 } 3325 } else { /* by default, new_local_type is seqdense */ 3326 new_local_type = MATSEQDENSE; 3327 bs = 1; 3328 } 3329 3330 /* create MATIS object if needed */ 3331 if (reuse == MAT_INITIAL_MATRIX) { 3332 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3333 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3334 } else { 3335 /* it also destroys the local matrices */ 3336 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3337 } 3338 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3339 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3340 3341 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3342 3343 /* Global to local map of received indices */ 3344 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3345 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3346 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3347 3348 /* restore attributes -> type of incoming data and its size */ 3349 buf_size_idxs = 0; 3350 for (i=0;i<n_recvs;i++) { 3351 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3352 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3353 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3354 } 3355 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3356 3357 /* set preallocation */ 3358 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3359 if (!newisdense) { 3360 PetscInt *new_local_nnz=0; 3361 3362 ptr_vals = recv_buffer_vals; 3363 ptr_idxs = recv_buffer_idxs_local; 3364 if (n_recvs) { 3365 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3366 } 3367 for (i=0;i<n_recvs;i++) { 3368 PetscInt j; 3369 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3370 for (j=0;j<*(ptr_idxs+1);j++) { 3371 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3372 } 3373 } else { 3374 /* TODO */ 3375 } 3376 ptr_idxs += olengths_idxs[i]; 3377 } 3378 if (new_local_nnz) { 3379 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3380 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3381 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3382 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3383 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3384 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3385 } else { 3386 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3387 } 3388 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3389 } else { 3390 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3391 } 3392 3393 /* set values */ 3394 ptr_vals = recv_buffer_vals; 3395 ptr_idxs = recv_buffer_idxs_local; 3396 for (i=0;i<n_recvs;i++) { 3397 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3398 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3399 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3400 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3401 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3402 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3403 } else { 3404 /* TODO */ 3405 } 3406 ptr_idxs += olengths_idxs[i]; 3407 ptr_vals += olengths_vals[i]; 3408 } 3409 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3410 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3411 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3412 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3413 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3414 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3415 3416 #if 0 3417 if (!restrict_comm) { /* check */ 3418 Vec lvec,rvec; 3419 PetscReal infty_error; 3420 3421 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3422 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3423 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3424 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3425 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3426 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3427 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3428 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3429 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3430 } 3431 #endif 3432 3433 /* assemble new additional is (if any) */ 3434 if (nis) { 3435 PetscInt **temp_idxs,*count_is,j,psum; 3436 3437 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3438 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3439 ptr_idxs = recv_buffer_idxs_is; 3440 psum = 0; 3441 for (i=0;i<n_recvs;i++) { 3442 for (j=0;j<nis;j++) { 3443 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3444 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3445 psum += plen; 3446 ptr_idxs += plen+1; /* shift pointer to received data */ 3447 } 3448 } 3449 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3450 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3451 for (i=1;i<nis;i++) { 3452 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3453 } 3454 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3455 ptr_idxs = recv_buffer_idxs_is; 3456 for (i=0;i<n_recvs;i++) { 3457 for (j=0;j<nis;j++) { 3458 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3459 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3460 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3461 ptr_idxs += plen+1; /* shift pointer to received data */ 3462 } 3463 } 3464 for (i=0;i<nis;i++) { 3465 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3466 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3467 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3468 } 3469 ierr = PetscFree(count_is);CHKERRQ(ierr); 3470 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3471 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3472 } 3473 /* free workspace */ 3474 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3475 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3476 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3477 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3478 if (isdense) { 3479 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3480 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3481 } else { 3482 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3483 } 3484 if (nis) { 3485 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3486 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3487 } 3488 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3489 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3490 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3491 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3492 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3493 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3494 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3495 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3496 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3497 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3498 ierr = PetscFree(onodes);CHKERRQ(ierr); 3499 if (nis) { 3500 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3501 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3502 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3503 } 3504 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3505 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3506 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3507 for (i=0;i<nis;i++) { 3508 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3509 } 3510 } 3511 PetscFunctionReturn(0); 3512 } 3513 3514 /* temporary hack into ksp private data structure */ 3515 #include <petsc-private/kspimpl.h> 3516 3517 #undef __FUNCT__ 3518 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3519 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3520 { 3521 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3522 PC_IS *pcis = (PC_IS*)pc->data; 3523 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3524 MatNullSpace CoarseNullSpace=NULL; 3525 ISLocalToGlobalMapping coarse_islg; 3526 IS coarse_is,*isarray; 3527 PetscInt i,im_active=-1,active_procs=-1; 3528 PetscInt nis,nisdofs,nisneu; 3529 PC pc_temp; 3530 PCType coarse_pc_type; 3531 KSPType coarse_ksp_type; 3532 PetscBool multilevel_requested,multilevel_allowed; 3533 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3534 Mat t_coarse_mat_is; 3535 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3536 PetscMPIInt all_procs; 3537 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3538 PetscBool compute_vecs = PETSC_FALSE; 3539 PetscScalar *array; 3540 PetscErrorCode ierr; 3541 3542 PetscFunctionBegin; 3543 /* Assign global numbering to coarse dofs */ 3544 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3545 compute_vecs = PETSC_TRUE; 3546 PetscInt ocoarse_size; 3547 ocoarse_size = pcbddc->coarse_size; 3548 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3549 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3550 /* see if we can avoid some work */ 3551 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3552 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3553 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3554 coarse_reuse = PETSC_FALSE; 3555 } else { /* we can safely reuse already computed coarse matrix */ 3556 coarse_reuse = PETSC_TRUE; 3557 } 3558 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3559 coarse_reuse = PETSC_FALSE; 3560 } 3561 /* reset any subassembling information */ 3562 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3563 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3564 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3565 coarse_reuse = PETSC_TRUE; 3566 } 3567 3568 /* count "active" (i.e. with positive local size) and "void" processes */ 3569 im_active = !!(pcis->n); 3570 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3571 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3572 void_procs = all_procs-active_procs; 3573 csin_type_simple = PETSC_TRUE; 3574 redist = PETSC_FALSE; 3575 if (pcbddc->current_level && void_procs) { 3576 csin_ml = PETSC_TRUE; 3577 ncoarse_ml = void_procs; 3578 csin_ds = PETSC_TRUE; 3579 ncoarse_ds = void_procs; 3580 } else { 3581 csin_ml = PETSC_FALSE; 3582 ncoarse_ml = all_procs; 3583 if (void_procs) { 3584 csin_ds = PETSC_TRUE; 3585 ncoarse_ds = void_procs; 3586 csin_type_simple = PETSC_FALSE; 3587 } else { 3588 if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) { 3589 csin_ds = PETSC_TRUE; 3590 ncoarse_ds = pcbddc->redistribute_coarse; 3591 redist = PETSC_TRUE; 3592 } else { 3593 csin_ds = PETSC_FALSE; 3594 ncoarse_ds = all_procs; 3595 } 3596 } 3597 } 3598 3599 /* 3600 test if we can go multilevel: three conditions must be satisfied: 3601 - we have not exceeded the number of levels requested 3602 - we can actually subassemble the active processes 3603 - we can find a suitable number of MPI processes where we can place the subassembled problem 3604 */ 3605 multilevel_allowed = PETSC_FALSE; 3606 multilevel_requested = PETSC_FALSE; 3607 if (pcbddc->current_level < pcbddc->max_levels) { 3608 multilevel_requested = PETSC_TRUE; 3609 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3610 multilevel_allowed = PETSC_FALSE; 3611 } else { 3612 multilevel_allowed = PETSC_TRUE; 3613 } 3614 } 3615 /* determine number of process partecipating to coarse solver */ 3616 if (multilevel_allowed) { 3617 ncoarse = ncoarse_ml; 3618 csin = csin_ml; 3619 } else { 3620 ncoarse = ncoarse_ds; 3621 csin = csin_ds; 3622 } 3623 3624 /* creates temporary l2gmap and IS for coarse indexes */ 3625 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3626 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3627 3628 /* creates temporary MATIS object for coarse matrix */ 3629 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3630 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3631 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3632 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3633 #if 0 3634 { 3635 PetscViewer viewer; 3636 char filename[256]; 3637 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3638 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3639 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3640 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3641 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3642 } 3643 #endif 3644 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3645 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3646 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3647 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3648 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3649 3650 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3651 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3652 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3653 const PetscInt *idxs; 3654 ISLocalToGlobalMapping tmap; 3655 3656 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3657 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3658 /* allocate space for temporary storage */ 3659 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3660 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3661 /* allocate for IS array */ 3662 nisdofs = pcbddc->n_ISForDofsLocal; 3663 nisneu = !!pcbddc->NeumannBoundariesLocal; 3664 nis = nisdofs + nisneu; 3665 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3666 /* dofs splitting */ 3667 for (i=0;i<nisdofs;i++) { 3668 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3669 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3670 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3671 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3672 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3673 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3674 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3675 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3676 } 3677 /* neumann boundaries */ 3678 if (pcbddc->NeumannBoundariesLocal) { 3679 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3680 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3681 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3682 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3683 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3684 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3685 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3686 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3687 } 3688 /* free memory */ 3689 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3690 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3691 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3692 } else { 3693 nis = 0; 3694 nisdofs = 0; 3695 nisneu = 0; 3696 isarray = NULL; 3697 } 3698 /* destroy no longer needed map */ 3699 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3700 3701 /* restrict on coarse candidates (if needed) */ 3702 coarse_mat_is = NULL; 3703 if (csin) { 3704 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3705 if (redist) { 3706 PetscMPIInt rank; 3707 PetscInt spc,n_spc_p1,dest[1]; 3708 3709 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3710 spc = all_procs/pcbddc->redistribute_coarse; 3711 n_spc_p1 = all_procs%pcbddc->redistribute_coarse; 3712 if (rank > n_spc_p1*(spc+1)-1) { 3713 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 3714 } else { 3715 dest[0] = rank/(spc+1); 3716 } 3717 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3718 } else { 3719 PetscInt j,tissize,*nisindices; 3720 PetscInt *coarse_candidates; 3721 const PetscInt* tisindices; 3722 /* get coarse candidates' ranks in pc communicator */ 3723 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 3724 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3725 for (i=0,j=0;i<all_procs;i++) { 3726 if (!coarse_candidates[i]) { 3727 coarse_candidates[j]=i; 3728 j++; 3729 } 3730 } 3731 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3732 /* get a suitable subassembling pattern */ 3733 if (csin_type_simple) { 3734 PetscMPIInt rank; 3735 PetscInt issize,isidx; 3736 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3737 if (im_active) { 3738 issize = 1; 3739 isidx = (PetscInt)rank; 3740 } else { 3741 issize = 0; 3742 isidx = -1; 3743 } 3744 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3745 } else { 3746 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3747 } 3748 if (pcbddc->dbg_flag) { 3749 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3750 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3751 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3752 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3753 for (i=0;i<j;i++) { 3754 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3755 } 3756 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3757 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3758 } 3759 /* shift the pattern on coarse candidates */ 3760 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3761 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3762 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 3763 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3764 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3765 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3766 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3767 } 3768 } 3769 if (pcbddc->dbg_flag) { 3770 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3771 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3772 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3773 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3774 } 3775 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3776 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3777 } else { 3778 if (pcbddc->dbg_flag) { 3779 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3780 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3781 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3782 } 3783 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3784 coarse_mat_is = t_coarse_mat_is; 3785 } 3786 3787 /* create local to global scatters for coarse problem */ 3788 if (compute_vecs) { 3789 PetscInt lrows; 3790 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3791 if (coarse_mat_is) { 3792 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3793 } else { 3794 lrows = 0; 3795 } 3796 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3797 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3798 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3799 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3800 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3801 } 3802 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3803 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3804 3805 /* set defaults for coarse KSP and PC */ 3806 if (multilevel_allowed) { 3807 coarse_ksp_type = KSPRICHARDSON; 3808 coarse_pc_type = PCBDDC; 3809 } else { 3810 coarse_ksp_type = KSPPREONLY; 3811 coarse_pc_type = PCREDUNDANT; 3812 } 3813 3814 /* print some info if requested */ 3815 if (pcbddc->dbg_flag) { 3816 if (!multilevel_allowed) { 3817 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3818 if (multilevel_requested) { 3819 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3820 } else if (pcbddc->max_levels) { 3821 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3822 } 3823 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3824 } 3825 } 3826 3827 /* create the coarse KSP object only once with defaults */ 3828 if (coarse_mat_is) { 3829 MatReuse coarse_mat_reuse; 3830 PetscViewer dbg_viewer = NULL; 3831 if (pcbddc->dbg_flag) { 3832 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3833 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3834 } 3835 if (!pcbddc->coarse_ksp) { 3836 char prefix[256],str_level[16]; 3837 size_t len; 3838 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3839 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3840 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3841 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3842 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3843 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3844 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3845 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3846 /* prefix */ 3847 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3848 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3849 if (!pcbddc->current_level) { 3850 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3851 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3852 } else { 3853 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3854 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3855 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3856 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3857 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3858 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3859 } 3860 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3861 /* allow user customization */ 3862 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3863 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3864 } 3865 3866 /* get some info after set from options */ 3867 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3868 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3869 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3870 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3871 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3872 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3873 isbddc = PETSC_FALSE; 3874 } 3875 if (isredundant) { 3876 KSP inner_ksp; 3877 PC inner_pc; 3878 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3879 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3880 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3881 } 3882 3883 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3884 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3885 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3886 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3887 if (nisdofs) { 3888 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3889 for (i=0;i<nisdofs;i++) { 3890 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3891 } 3892 } 3893 if (nisneu) { 3894 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3895 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3896 } 3897 3898 /* assemble coarse matrix */ 3899 if (coarse_reuse) { 3900 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3901 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3902 coarse_mat_reuse = MAT_REUSE_MATRIX; 3903 } else { 3904 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3905 } 3906 if (isbddc || isnn) { 3907 if (pcbddc->coarsening_ratio > 1) { 3908 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3909 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3910 if (pcbddc->dbg_flag) { 3911 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3912 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3913 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3914 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3915 } 3916 } 3917 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3918 } else { 3919 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 3920 coarse_mat = coarse_mat_is; 3921 } 3922 } else { 3923 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3924 } 3925 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3926 3927 /* propagate symmetry info to coarse matrix */ 3928 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3929 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3930 3931 /* set operators */ 3932 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3933 if (pcbddc->dbg_flag) { 3934 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3935 } 3936 } else { /* processes non partecipating to coarse solver (if any) */ 3937 coarse_mat = 0; 3938 } 3939 ierr = PetscFree(isarray);CHKERRQ(ierr); 3940 #if 0 3941 { 3942 PetscViewer viewer; 3943 char filename[256]; 3944 sprintf(filename,"coarse_mat.m"); 3945 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3946 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3947 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3948 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3949 } 3950 #endif 3951 3952 /* Compute coarse null space (special handling by BDDC only) */ 3953 if (pcbddc->NullSpace) { 3954 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3955 } 3956 3957 if (pcbddc->coarse_ksp) { 3958 Vec crhs,csol; 3959 PetscBool ispreonly; 3960 if (CoarseNullSpace) { 3961 if (isbddc) { 3962 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3963 } else { 3964 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3965 } 3966 } 3967 /* setup coarse ksp */ 3968 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3969 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3970 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3971 /* hack */ 3972 if (!csol) { 3973 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3974 } 3975 if (!crhs) { 3976 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3977 } 3978 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3979 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3980 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3981 KSP check_ksp; 3982 KSPType check_ksp_type; 3983 PC check_pc; 3984 Vec check_vec,coarse_vec; 3985 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 3986 PetscInt its; 3987 PetscBool compute_eigs; 3988 PetscReal *eigs_r,*eigs_c; 3989 PetscInt neigs; 3990 const char *prefix; 3991 3992 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3993 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3994 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3995 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3996 if (ispreonly) { 3997 check_ksp_type = KSPPREONLY; 3998 compute_eigs = PETSC_FALSE; 3999 } else { 4000 check_ksp_type = KSPGMRES; 4001 compute_eigs = PETSC_TRUE; 4002 } 4003 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4004 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4005 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4006 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4007 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4008 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4009 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4010 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4011 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4012 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4013 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4014 /* create random vec */ 4015 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4016 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4017 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4018 if (CoarseNullSpace) { 4019 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4020 } 4021 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4022 /* solve coarse problem */ 4023 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4024 if (CoarseNullSpace) { 4025 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4026 } 4027 /* set eigenvalue estimation if preonly has not been requested */ 4028 if (compute_eigs) { 4029 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4030 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4031 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4032 lambda_max = eigs_r[neigs-1]; 4033 lambda_min = eigs_r[0]; 4034 if (pcbddc->use_coarse_estimates) { 4035 if (lambda_max>lambda_min) { 4036 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4037 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4038 } 4039 } 4040 } 4041 4042 /* check coarse problem residual error */ 4043 if (pcbddc->dbg_flag) { 4044 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4045 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4046 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4047 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4048 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4049 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4050 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4051 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4052 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4053 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4054 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4055 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4056 if (compute_eigs) { 4057 PetscReal lambda_max_s,lambda_min_s; 4058 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4059 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4060 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4061 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4062 for (i=0;i<neigs;i++) { 4063 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4064 } 4065 } 4066 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4067 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4068 } 4069 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4070 if (compute_eigs) { 4071 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4072 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4073 } 4074 } 4075 } 4076 /* print additional info */ 4077 if (pcbddc->dbg_flag) { 4078 /* waits until all processes reaches this point */ 4079 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4080 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4081 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4082 } 4083 4084 /* free memory */ 4085 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4086 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4087 PetscFunctionReturn(0); 4088 } 4089 4090 #undef __FUNCT__ 4091 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4092 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4093 { 4094 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4095 PC_IS* pcis = (PC_IS*)pc->data; 4096 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4097 PetscInt i,coarse_size; 4098 PetscInt *local_primal_indices; 4099 PetscErrorCode ierr; 4100 4101 PetscFunctionBegin; 4102 /* Compute global number of coarse dofs */ 4103 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4104 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4105 } 4106 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4107 4108 /* check numbering */ 4109 if (pcbddc->dbg_flag) { 4110 PetscScalar coarsesum,*array; 4111 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4112 4113 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4114 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4115 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4116 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4117 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4118 for (i=0;i<pcbddc->local_primal_size;i++) { 4119 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4120 } 4121 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4122 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4123 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4124 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4125 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4126 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4127 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4128 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4129 for (i=0;i<pcis->n;i++) { 4130 if (array[i] == 1.0) { 4131 set_error = PETSC_TRUE; 4132 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4133 } 4134 } 4135 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4136 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4137 for (i=0;i<pcis->n;i++) { 4138 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4139 } 4140 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4141 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4142 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4143 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4144 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4145 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4146 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4147 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4148 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4149 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4150 for (i=0;i<pcbddc->local_primal_size;i++) { 4151 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4152 } 4153 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4154 } 4155 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4156 if (set_error_reduced) { 4157 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4158 } 4159 } 4160 /* get back data */ 4161 *coarse_size_n = coarse_size; 4162 *local_primal_indices_n = local_primal_indices; 4163 PetscFunctionReturn(0); 4164 } 4165 4166 #undef __FUNCT__ 4167 #define __FUNCT__ "PCBDDCGlobalToLocal" 4168 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4169 { 4170 IS localis_t; 4171 PetscInt i,lsize,*idxs,n; 4172 PetscScalar *vals; 4173 PetscErrorCode ierr; 4174 4175 PetscFunctionBegin; 4176 /* get indices in local ordering exploiting local to global map */ 4177 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4178 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4179 for (i=0;i<lsize;i++) vals[i] = 1.0; 4180 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4181 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4182 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4183 if (idxs) { /* multilevel guard */ 4184 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4185 } 4186 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4187 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4188 ierr = PetscFree(vals);CHKERRQ(ierr); 4189 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4190 /* now compute set in local ordering */ 4191 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4192 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4193 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4194 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4195 for (i=0,lsize=0;i<n;i++) { 4196 if (PetscRealPart(vals[i]) > 0.5) { 4197 lsize++; 4198 } 4199 } 4200 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4201 for (i=0,lsize=0;i<n;i++) { 4202 if (PetscRealPart(vals[i]) > 0.5) { 4203 idxs[lsize++] = i; 4204 } 4205 } 4206 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4207 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4208 *localis = localis_t; 4209 PetscFunctionReturn(0); 4210 } 4211 4212 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4213 #undef __FUNCT__ 4214 #define __FUNCT__ "PCBDDCMatMult_Private" 4215 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4216 { 4217 PCBDDCChange_ctx change_ctx; 4218 PetscErrorCode ierr; 4219 4220 PetscFunctionBegin; 4221 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4222 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4223 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4224 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4225 PetscFunctionReturn(0); 4226 } 4227 4228 #undef __FUNCT__ 4229 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4230 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4231 { 4232 PCBDDCChange_ctx change_ctx; 4233 PetscErrorCode ierr; 4234 4235 PetscFunctionBegin; 4236 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4237 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4238 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4239 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4240 PetscFunctionReturn(0); 4241 } 4242