1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCSetUpSolvers" 10 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscScalar *coarse_submat_vals; 14 PetscErrorCode ierr; 15 16 PetscFunctionBegin; 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "PCBDDCResetSolvers" 84 PetscErrorCode PCBDDCResetSolvers(PC pc) 85 { 86 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 87 PetscErrorCode ierr; 88 89 PetscFunctionBegin; 90 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 91 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 97 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 99 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 102 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 107 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 108 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 109 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 110 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 111 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 112 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 113 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 114 PetscFunctionReturn(0); 115 } 116 117 #undef __FUNCT__ 118 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 119 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 120 { 121 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 122 PC_IS *pcis = (PC_IS*)pc->data; 123 VecType impVecType; 124 PetscInt n_constraints,n_R,old_size; 125 PetscErrorCode ierr; 126 127 PetscFunctionBegin; 128 if (!pcbddc->ConstraintMatrix) { 129 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 130 } 131 /* get sizes */ 132 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 133 n_R = pcis->n-pcbddc->n_actual_vertices; 134 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 135 /* local work vectors (try to avoid unneeded work)*/ 136 /* R nodes */ 137 old_size = -1; 138 if (pcbddc->vec1_R) { 139 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 140 } 141 if (n_R != old_size) { 142 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 143 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 144 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 145 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 146 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 147 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 148 } 149 /* local primal dofs */ 150 old_size = -1; 151 if (pcbddc->vec1_P) { 152 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 153 } 154 if (pcbddc->local_primal_size != old_size) { 155 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 156 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 157 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 158 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 159 } 160 /* local explicit constraints */ 161 old_size = -1; 162 if (pcbddc->vec1_C) { 163 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 164 } 165 if (n_constraints && n_constraints != old_size) { 166 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 167 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 168 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 169 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "PCBDDCSetUpCorrection" 176 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 177 { 178 PetscErrorCode ierr; 179 /* pointers to pcis and pcbddc */ 180 PC_IS* pcis = (PC_IS*)pc->data; 181 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 182 /* submatrices of local problem */ 183 Mat A_RV,A_VR,A_VV; 184 /* working matrices */ 185 Mat M1,M2,M3,C_CR; 186 /* working vectors */ 187 Vec vec1_C,vec2_C,vec1_V,vec2_V; 188 /* additional working stuff */ 189 IS is_aux; 190 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 191 const PetscScalar *array,*row_cmat_values; 192 const PetscInt *row_cmat_indices,*idx_R_local; 193 PetscInt *idx_V_B,*auxindices; 194 PetscInt n_vertices,n_constraints,size_of_constraint; 195 PetscInt i,j,n_R,n_D,n_B; 196 PetscBool unsymmetric_check; 197 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 198 MatType impMatType; 199 /* some shortcuts to scalars */ 200 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 201 /* for debugging purposes */ 202 PetscReal *coarsefunctions_errors,*constraints_errors; 203 204 PetscFunctionBegin; 205 /* get number of vertices (corners plus constraints with change of basis) 206 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 207 n_vertices = pcbddc->n_actual_vertices; 208 n_constraints = pcbddc->local_primal_size-n_vertices; 209 /* Set Non-overlapping dimensions */ 210 n_B = pcis->n_B; n_D = pcis->n - n_B; 211 n_R = pcis->n-n_vertices; 212 213 /* Set types for local objects needed by BDDC precondtioner */ 214 impMatType = MATSEQDENSE; 215 216 /* Allocating some extra storage just to be safe */ 217 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 218 for (i=0;i<pcis->n;i++) auxindices[i]=i; 219 220 /* vertices in boundary numbering */ 221 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 222 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 223 if (i != n_vertices) { 224 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 225 } 226 227 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 228 if (n_constraints) { 229 /* see if we can save some allocations */ 230 if (pcbddc->local_auxmat2) { 231 PetscInt on_R,on_constraints; 232 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 233 if (on_R != n_R || on_constraints != n_constraints) { 234 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 235 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 236 } 237 } 238 /* work vectors */ 239 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 240 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 241 /* auxiliary matrices */ 242 if (!pcbddc->local_auxmat2) { 243 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 244 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 245 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 246 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 247 } 248 249 /* Extract constraints on R nodes: C_{CR} */ 250 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 251 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 252 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 253 254 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 255 for (i=0;i<n_constraints;i++) { 256 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 257 /* Get row of constraint matrix in R numbering */ 258 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 259 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 260 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 261 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 262 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 263 /* Solve for row of constraint matrix in R numbering */ 264 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 265 /* Set values in local_auxmat2 */ 266 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 267 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 268 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 269 } 270 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 271 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 272 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 273 274 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 275 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 276 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 277 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 278 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 279 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 280 ierr = MatSetUp(M1);CHKERRQ(ierr); 281 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 282 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 283 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 284 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 285 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 286 ierr = MatDestroy(&M2);CHKERRQ(ierr); 287 ierr = MatDestroy(&M3);CHKERRQ(ierr); 288 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 289 if (!pcbddc->local_auxmat1) { 290 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 291 } else { 292 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 293 } 294 } 295 296 /* Get submatrices from subdomain matrix */ 297 if (n_vertices) { 298 PetscInt ibs,mbs; 299 PetscBool issbaij; 300 Mat newmat; 301 302 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 303 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 304 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 305 if (ibs != mbs) { /* need to convert to SEQAIJ */ 306 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 307 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 308 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 309 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 310 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 311 } else { 312 /* this is safe */ 313 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 314 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 315 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 316 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 317 /* which of the two approaches is faster? */ 318 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 319 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 320 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 321 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 322 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 323 } else { 324 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 325 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 326 } 327 } 328 ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 329 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 330 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 331 } 332 333 /* Matrix of coarse basis functions (local) */ 334 if (pcbddc->coarse_phi_B) { 335 PetscInt on_B,on_primal; 336 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 337 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 338 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 339 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 340 } 341 } 342 if (pcbddc->coarse_phi_D) { 343 PetscInt on_D,on_primal; 344 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 345 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 346 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 347 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 348 } 349 } 350 if (!pcbddc->coarse_phi_B) { 351 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 352 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 353 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 354 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 355 } 356 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 357 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 358 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 359 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 360 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 361 } 362 363 if (pcbddc->dbg_flag) { 364 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 365 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 366 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 367 } 368 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 369 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 370 371 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 372 373 /* vertices */ 374 for (i=0;i<n_vertices;i++) { 375 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 376 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 377 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 378 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 379 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 380 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 381 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 382 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 383 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 384 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 385 if (n_constraints) { 386 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 387 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 388 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 389 } 390 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 391 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 392 393 /* Set values in coarse basis function and subdomain part of coarse_mat */ 394 /* coarse basis functions */ 395 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 396 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 397 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 398 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 399 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 400 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 401 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 402 if (pcbddc->switch_static || pcbddc->dbg_flag) { 403 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 404 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 405 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 406 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 407 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 408 } 409 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 410 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 411 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 412 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 413 if (n_constraints) { 414 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 415 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 416 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 417 } 418 419 /* check */ 420 if (pcbddc->dbg_flag) { 421 /* assemble subdomain vector on local nodes */ 422 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 423 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 424 if (n_R) { 425 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 426 } 427 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 428 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 429 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 430 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 431 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 432 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 433 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 434 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 436 if (n_constraints) { 437 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 438 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 440 } 441 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 442 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 443 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 444 /* check saddle point solution */ 445 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 446 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 447 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 448 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 449 /* shift by the identity matrix */ 450 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 451 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 452 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 453 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 454 } 455 } 456 457 /* constraints */ 458 for (i=0;i<n_constraints;i++) { 459 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 460 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 461 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 462 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 463 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 464 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 465 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 466 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 467 if (n_vertices) { 468 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 469 } 470 /* Set values in coarse basis function and subdomain part of coarse_mat */ 471 /* coarse basis functions */ 472 j = i+n_vertices; /* don't touch this! */ 473 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 474 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 475 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 476 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 477 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 478 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 479 if (pcbddc->switch_static || pcbddc->dbg_flag) { 480 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 481 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 482 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 483 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 484 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 485 } 486 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 487 if (n_vertices) { 488 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 489 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 490 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 491 } 492 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 493 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 494 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 495 496 if (pcbddc->dbg_flag) { 497 /* assemble subdomain vector on nodes */ 498 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 499 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 500 if (n_R) { 501 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 502 } 503 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 504 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 505 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 506 /* assemble subdomain vector of lagrange multipliers */ 507 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 508 if (n_vertices) { 509 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 510 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 511 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 512 } 513 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 514 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 515 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 516 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 517 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 518 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 519 /* check saddle point solution */ 520 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 521 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 522 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 523 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 524 /* shift by the identity matrix */ 525 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 526 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 527 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 528 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 529 } 530 } 531 /* call assembling routines for local coarse basis */ 532 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 533 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 534 if (pcbddc->switch_static || pcbddc->dbg_flag) { 535 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 536 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 537 } 538 539 /* compute other basis functions for non-symmetric problems */ 540 /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/ 541 { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */ 542 PetscBool setsym; 543 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr); 544 if (!setsym) pcbddc->issym = PETSC_FALSE; 545 } 546 547 if (!pcbddc->issym) { 548 if (!pcbddc->coarse_psi_B) { 549 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 550 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 551 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 552 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 553 } 554 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 555 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 556 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 557 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 558 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 559 } 560 for (i=0;i<pcbddc->local_primal_size;i++) { 561 if (n_constraints) { 562 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 563 for (j=0;j<n_constraints;j++) { 564 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 565 } 566 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 567 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 568 } 569 if (i<n_vertices) { 570 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 571 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 572 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 573 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 574 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 575 if (n_constraints) { 576 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 577 } 578 } else { 579 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 580 } 581 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 582 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 583 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 584 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 585 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 586 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 587 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 588 if (i<n_vertices) { 589 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 590 } 591 if (pcbddc->switch_static || pcbddc->dbg_flag) { 592 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 593 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 594 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 595 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 596 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 597 } 598 599 if (pcbddc->dbg_flag) { 600 /* assemble subdomain vector on nodes */ 601 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 602 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 603 if (n_R) { 604 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 605 } 606 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 607 if (i<n_vertices) { 608 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 609 } 610 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 611 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 612 /* assemble subdomain vector of lagrange multipliers */ 613 for (j=0;j<pcbddc->local_primal_size;j++) { 614 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 615 } 616 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 617 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 618 /* check saddle point solution */ 619 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 620 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 621 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 622 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 623 /* shift by the identity matrix */ 624 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 625 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 626 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 627 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 628 } 629 } 630 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 631 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 632 if (pcbddc->switch_static || pcbddc->dbg_flag) { 633 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 634 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 635 } 636 unsymmetric_check = PETSC_TRUE; 637 } else { /* take references to already computed coarse basis */ 638 unsymmetric_check = PETSC_FALSE; 639 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 640 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 641 if (pcbddc->coarse_phi_D) { 642 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 643 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 644 } 645 } 646 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 647 /* Checking coarse_sub_mat and coarse basis functios */ 648 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 649 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 650 if (pcbddc->dbg_flag) { 651 Mat coarse_sub_mat; 652 Mat AUXMAT,TM1,TM2,TM3,TM4; 653 Mat coarse_phi_D,coarse_phi_B; 654 Mat coarse_psi_D,coarse_psi_B; 655 Mat A_II,A_BB,A_IB,A_BI; 656 MatType checkmattype=MATSEQAIJ; 657 PetscReal real_value; 658 659 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 660 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 661 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 662 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 663 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 664 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 665 if (unsymmetric_check) { 666 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 667 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 668 } 669 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 670 671 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 672 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 673 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 674 if (unsymmetric_check) { 675 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 676 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 677 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 678 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 679 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 680 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 681 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 682 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 683 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 684 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 685 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 686 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 687 } else { 688 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 689 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 690 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 691 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 692 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 693 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 694 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 695 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 696 } 697 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 698 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 699 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 700 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 701 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 702 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 703 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 704 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 708 for (i=0;i<pcbddc->local_primal_size;i++) { 709 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 710 } 711 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 712 for (i=0;i<pcbddc->local_primal_size;i++) { 713 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 714 } 715 if (unsymmetric_check) { 716 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 717 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 718 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 719 } 720 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 721 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 722 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 723 } 724 } 725 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 726 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 727 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 728 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 729 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 730 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 731 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 732 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 733 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 734 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 735 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 736 if (unsymmetric_check) { 737 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 738 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 739 } 740 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 741 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 742 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 743 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 744 } 745 /* free memory */ 746 if (n_vertices) { 747 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 748 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 749 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 750 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 751 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 752 } 753 if (n_constraints) { 754 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 755 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 756 ierr = MatDestroy(&M1);CHKERRQ(ierr); 757 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 758 } 759 ierr = PetscFree(auxindices);CHKERRQ(ierr); 760 /* get back data */ 761 *coarse_submat_vals_n = coarse_submat_vals; 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNCT__ 766 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 767 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 768 { 769 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 770 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 771 Mat *change_mat_all; 772 IS is_local,is_global; 773 PetscBool sorted,isseqaij; 774 PetscInt local_size,*idxs_perm; 775 PetscErrorCode ierr; 776 777 PetscFunctionBegin; 778 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 779 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 780 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 781 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 782 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 783 ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr); 784 if (!sorted) { 785 const PetscInt *idxs; 786 PetscInt *idxs_sorted,i; 787 788 ierr = PetscMalloc1(local_size,&idxs_perm);CHKERRQ(ierr); 789 ierr = PetscMalloc1(local_size,&idxs_sorted);CHKERRQ(ierr); 790 for (i=0;i<local_size;i++) { 791 idxs_perm[i] = i; 792 } 793 ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr); 794 ierr = PetscSortIntWithPermutation(local_size,idxs,idxs_perm);CHKERRQ(ierr); 795 for (i=0;i<local_size;i++) { 796 idxs_sorted[i] = idxs[idxs_perm[i]]; 797 } 798 ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr); 799 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 800 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr); 801 } 802 803 /* get change of basis on the whole set of local dofs */ 804 ierr = MatGetSubMatrices(ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr); 805 806 if (!sorted) { 807 Mat new_mat; 808 IS is_perm; 809 PetscInt *idxs,i; 810 811 ierr = PetscMalloc1(local_size,&idxs);CHKERRQ(ierr); 812 for (i=0;i<local_size;i++) { 813 idxs[idxs_perm[i]] = i; 814 } 815 ierr = PetscFree(idxs_perm);CHKERRQ(ierr); 816 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr); 817 ierr = ISSetPermutation(is_perm);CHKERRQ(ierr); 818 ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr); 819 ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr); 820 change_mat_all[0] = new_mat; 821 ierr = ISDestroy(&is_perm);CHKERRQ(ierr); 822 } 823 824 /* check */ 825 if (pcbddc->dbg_flag) { 826 Vec x,x_change; 827 PetscReal error; 828 829 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 830 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 831 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 832 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 833 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 834 ierr = MatMult(change_mat_all[0],matis->x,matis->y);CHKERRQ(ierr); 835 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 836 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 837 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 838 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 839 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 840 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 841 ierr = VecDestroy(&x);CHKERRQ(ierr); 842 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 843 } 844 845 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 846 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 847 if (isseqaij) { 848 ierr = MatPtAP(matis->A,change_mat_all[0],MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 849 } else { 850 Mat work_mat; 851 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 852 ierr = MatPtAP(work_mat,change_mat_all[0],MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 853 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 854 } 855 /* 856 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 857 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 858 */ 859 ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr); 860 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 861 PetscFunctionReturn(0); 862 } 863 864 #undef __FUNCT__ 865 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 866 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 867 { 868 PC_IS* pcis = (PC_IS*)(pc->data); 869 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 870 IS is_aux1,is_aux2; 871 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 872 PetscInt n_vertices,i,j,n_R,n_D,n_B; 873 PetscInt vbs,bs; 874 PetscBT bitmask; 875 PetscErrorCode ierr; 876 877 PetscFunctionBegin; 878 /* 879 No need to setup local scatters if 880 - primal space is unchanged 881 AND 882 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 883 AND 884 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 885 */ 886 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 887 PetscFunctionReturn(0); 888 } 889 /* destroy old objects */ 890 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 891 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 892 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 893 /* Set Non-overlapping dimensions */ 894 n_B = pcis->n_B; n_D = pcis->n - n_B; 895 n_vertices = pcbddc->n_actual_vertices; 896 /* create auxiliary bitmask */ 897 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 898 for (i=0;i<n_vertices;i++) { 899 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 900 } 901 902 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 903 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 904 for (i=0, n_R=0; i<pcis->n; i++) { 905 if (!PetscBTLookup(bitmask,i)) { 906 idx_R_local[n_R] = i; 907 n_R++; 908 } 909 } 910 911 /* Block code */ 912 vbs = 1; 913 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 914 if (bs>1 && !(n_vertices%bs)) { 915 PetscBool is_blocked = PETSC_TRUE; 916 PetscInt *vary; 917 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 918 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 919 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 920 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 921 for (i=0; i<n_vertices; i++) { 922 if (vary[i]!=0 && vary[i]!=bs) { 923 is_blocked = PETSC_FALSE; 924 break; 925 } 926 } 927 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 928 vbs = bs; 929 for (i=0;i<n_R/vbs;i++) { 930 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 931 } 932 } 933 ierr = PetscFree(vary);CHKERRQ(ierr); 934 } 935 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 936 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 937 938 /* print some info if requested */ 939 if (pcbddc->dbg_flag) { 940 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 941 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 942 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 943 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 944 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 945 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 946 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 947 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 948 } 949 950 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 951 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 952 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 953 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 954 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 955 for (i=0; i<n_D; i++) { 956 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 957 } 958 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 959 for (i=0, j=0; i<n_R; i++) { 960 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 961 aux_array1[j++] = i; 962 } 963 } 964 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 965 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 966 for (i=0, j=0; i<n_B; i++) { 967 if (!PetscBTLookup(bitmask,is_indices[i])) { 968 aux_array2[j++] = i; 969 } 970 } 971 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 972 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 973 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 974 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 975 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 976 977 if (pcbddc->switch_static || pcbddc->dbg_flag) { 978 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 979 for (i=0, j=0; i<n_R; i++) { 980 if (PetscBTLookup(bitmask,idx_R_local[i])) { 981 aux_array1[j++] = i; 982 } 983 } 984 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 985 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 986 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 987 } 988 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 989 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 996 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 997 { 998 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 999 PC_IS *pcis = (PC_IS*)pc->data; 1000 PC pc_temp; 1001 Mat A_RR; 1002 MatReuse reuse; 1003 PetscScalar m_one = -1.0; 1004 PetscReal value; 1005 PetscInt n_D,n_R,ibs,mbs; 1006 PetscBool use_exact,use_exact_reduced,issbaij; 1007 PetscErrorCode ierr; 1008 /* prefixes stuff */ 1009 char dir_prefix[256],neu_prefix[256],str_level[16]; 1010 size_t len; 1011 1012 PetscFunctionBegin; 1013 1014 /* compute prefixes */ 1015 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1016 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1017 if (!pcbddc->current_level) { 1018 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1019 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1020 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1021 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1022 } else { 1023 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1024 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1025 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1026 len -= 15; /* remove "pc_bddc_coarse_" */ 1027 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1028 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1029 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1030 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1031 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1032 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1033 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1034 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1035 } 1036 1037 /* DIRICHLET PROBLEM */ 1038 /* Matrix for Dirichlet problem is pcis->A_II */ 1039 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1040 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1041 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1042 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1043 /* default */ 1044 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1045 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1046 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1047 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1048 if (issbaij) { 1049 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1050 } else { 1051 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1052 } 1053 /* Allow user's customization */ 1054 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1055 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1056 } 1057 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1058 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1059 if (!n_D) { 1060 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1061 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1062 } 1063 /* Set Up KSP for Dirichlet problem of BDDC */ 1064 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1065 /* set ksp_D into pcis data */ 1066 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1067 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1068 pcis->ksp_D = pcbddc->ksp_D; 1069 1070 /* NEUMANN PROBLEM */ 1071 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1072 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1073 if (pcbddc->ksp_R) { /* already created ksp */ 1074 PetscInt nn_R; 1075 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1076 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1077 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1078 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1079 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1080 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1081 reuse = MAT_INITIAL_MATRIX; 1082 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1083 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1084 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1085 reuse = MAT_INITIAL_MATRIX; 1086 } else { /* safe to reuse the matrix */ 1087 reuse = MAT_REUSE_MATRIX; 1088 } 1089 } 1090 /* last check */ 1091 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1092 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1093 reuse = MAT_INITIAL_MATRIX; 1094 } 1095 } else { /* first time, so we need to create the matrix */ 1096 reuse = MAT_INITIAL_MATRIX; 1097 } 1098 /* extract A_RR */ 1099 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1100 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1101 if (ibs != mbs) { 1102 Mat newmat; 1103 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1104 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1105 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1106 } else { 1107 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1108 } 1109 if (!pcbddc->ksp_R) { /* create object if not present */ 1110 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1111 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1112 /* default */ 1113 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1114 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1115 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1116 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1117 if (issbaij) { 1118 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1119 } else { 1120 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1121 } 1122 /* Allow user's customization */ 1123 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1124 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1125 } 1126 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1127 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1128 if (!n_R) { 1129 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1130 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1131 } 1132 /* Set Up KSP for Neumann problem of BDDC */ 1133 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1134 1135 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1136 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1137 /* Dirichlet */ 1138 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1139 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1140 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1141 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1142 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1143 /* need to be adapted? */ 1144 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1145 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1146 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1147 /* print info */ 1148 if (pcbddc->dbg_flag) { 1149 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1150 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1151 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1152 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1153 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1154 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1155 } 1156 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1157 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1158 } 1159 1160 /* Neumann */ 1161 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1162 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1163 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1164 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1165 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1166 /* need to be adapted? */ 1167 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1168 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1169 /* print info */ 1170 if (pcbddc->dbg_flag) { 1171 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1172 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1173 } 1174 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1175 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1176 } 1177 } 1178 /* free Neumann problem's matrix */ 1179 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1185 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1186 { 1187 PetscErrorCode ierr; 1188 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1189 1190 PetscFunctionBegin; 1191 if (applytranspose) { 1192 if (pcbddc->local_auxmat1) { 1193 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1194 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1195 } 1196 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1197 } else { 1198 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1199 if (pcbddc->local_auxmat1) { 1200 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1201 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1202 } 1203 } 1204 PetscFunctionReturn(0); 1205 } 1206 1207 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1210 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1211 { 1212 PetscErrorCode ierr; 1213 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1214 PC_IS* pcis = (PC_IS*) (pc->data); 1215 const PetscScalar zero = 0.0; 1216 1217 PetscFunctionBegin; 1218 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1219 if (applytranspose) { 1220 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1221 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1222 } else { 1223 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1224 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1225 } 1226 /* start communications from local primal nodes to rhs of coarse solver */ 1227 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1228 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 1231 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1232 /* TODO remove null space when doing multilevel */ 1233 if (pcbddc->coarse_ksp) { 1234 if (applytranspose) { 1235 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1236 } else { 1237 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1238 } 1239 } 1240 1241 /* Local solution on R nodes */ 1242 if (pcis->n) { 1243 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1244 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1245 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1246 if (pcbddc->switch_static) { 1247 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1248 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1249 } 1250 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1251 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1252 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1253 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1254 if (pcbddc->switch_static) { 1255 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1256 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1257 } 1258 } 1259 1260 /* communications from coarse sol to local primal nodes */ 1261 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1262 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1263 1264 /* Sum contributions from two levels */ 1265 if (applytranspose) { 1266 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1267 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1268 } else { 1269 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1270 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1271 } 1272 PetscFunctionReturn(0); 1273 } 1274 1275 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1278 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1279 { 1280 PetscErrorCode ierr; 1281 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1282 PetscScalar *array,*array2; 1283 Vec from,to; 1284 1285 PetscFunctionBegin; 1286 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1287 from = pcbddc->coarse_vec; 1288 to = pcbddc->vec1_P; 1289 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1290 Vec tvec; 1291 PetscInt lsize; 1292 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1293 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1294 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1295 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1296 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1297 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1298 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1299 } 1300 } else { /* from local to global -> put data in coarse right hand side */ 1301 from = pcbddc->vec1_P; 1302 to = pcbddc->coarse_vec; 1303 } 1304 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1310 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1311 { 1312 PetscErrorCode ierr; 1313 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1314 PetscScalar *array,*array2; 1315 Vec from,to; 1316 1317 PetscFunctionBegin; 1318 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1319 from = pcbddc->coarse_vec; 1320 to = pcbddc->vec1_P; 1321 } else { /* from local to global -> put data in coarse right hand side */ 1322 from = pcbddc->vec1_P; 1323 to = pcbddc->coarse_vec; 1324 } 1325 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1326 if (smode == SCATTER_FORWARD) { 1327 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1328 Vec tvec; 1329 PetscInt lsize; 1330 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1331 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1332 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1333 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1334 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1335 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1336 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1337 } 1338 } 1339 PetscFunctionReturn(0); 1340 } 1341 1342 /* uncomment for testing purposes */ 1343 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1346 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1347 { 1348 PetscErrorCode ierr; 1349 PC_IS* pcis = (PC_IS*)(pc->data); 1350 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1351 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1352 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1353 MatType impMatType=MATSEQAIJ; 1354 /* one and zero */ 1355 PetscScalar one=1.0,zero=0.0; 1356 /* space to store constraints and their local indices */ 1357 PetscScalar *temp_quadrature_constraint; 1358 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1359 /* iterators */ 1360 PetscInt i,j,k,total_counts,temp_start_ptr; 1361 /* stuff to store connected components stored in pcbddc->mat_graph */ 1362 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1363 PetscInt n_ISForFaces,n_ISForEdges; 1364 /* near null space stuff */ 1365 MatNullSpace nearnullsp; 1366 const Vec *nearnullvecs; 1367 Vec *localnearnullsp; 1368 PetscBool nnsp_has_cnst; 1369 PetscInt nnsp_size; 1370 PetscScalar *array; 1371 /* BLAS integers */ 1372 PetscBLASInt lwork,lierr; 1373 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1374 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1375 /* LAPACK working arrays for SVD or POD */ 1376 PetscBool skip_lapack; 1377 PetscScalar *work; 1378 PetscReal *singular_vals; 1379 #if defined(PETSC_USE_COMPLEX) 1380 PetscReal *rwork; 1381 #endif 1382 #if defined(PETSC_MISSING_LAPACK_GESVD) 1383 PetscBLASInt Blas_one_2=1; 1384 PetscScalar *temp_basis,*correlation_mat; 1385 #else 1386 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1387 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1388 #endif 1389 /* reuse */ 1390 PetscInt olocal_primal_size; 1391 PetscInt *oprimal_indices_local_idxs; 1392 /* change of basis */ 1393 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1394 PetscBool boolforchange,qr_needed; 1395 PetscBT touched,change_basis,qr_needed_idx; 1396 /* auxiliary stuff */ 1397 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1398 PetscInt ncc,*gidxs,*permutation,*temp_indices_to_constraint_work; 1399 PetscScalar *temp_quadrature_constraint_work; 1400 /* some quantities */ 1401 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1402 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1403 1404 1405 PetscFunctionBegin; 1406 /* Destroy Mat objects computed previously */ 1407 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1408 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1409 /* Get index sets for faces, edges and vertices from graph */ 1410 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1411 /* free unneeded index sets */ 1412 if (!pcbddc->use_vertices) { 1413 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1414 } 1415 if (!pcbddc->use_edges) { 1416 for (i=0;i<n_ISForEdges;i++) { 1417 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1418 } 1419 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1420 n_ISForEdges = 0; 1421 } 1422 if (!pcbddc->use_faces) { 1423 for (i=0;i<n_ISForFaces;i++) { 1424 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1425 } 1426 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1427 n_ISForFaces = 0; 1428 } 1429 /* HACKS (the following two blocks of code) */ 1430 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1431 pcbddc->use_change_of_basis = PETSC_TRUE; 1432 if (!ISForEdges) { 1433 pcbddc->use_change_on_faces = PETSC_TRUE; 1434 } 1435 } 1436 if (pcbddc->NullSpace) { 1437 /* use_change_of_basis should be consistent among processors */ 1438 PetscBool tbool[2],gbool[2]; 1439 tbool [0] = pcbddc->use_change_of_basis; 1440 tbool [1] = pcbddc->use_change_on_faces; 1441 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1442 pcbddc->use_change_of_basis = gbool[0]; 1443 pcbddc->use_change_on_faces = gbool[1]; 1444 } 1445 /* print some info */ 1446 if (pcbddc->dbg_flag) { 1447 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1448 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1449 i = 0; 1450 if (ISForVertices) { 1451 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1452 } 1453 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1454 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1455 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1456 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1457 } 1458 /* check if near null space is attached to global mat */ 1459 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1460 if (nearnullsp) { 1461 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1462 /* remove any stored info */ 1463 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1464 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1465 /* store information for BDDC solver reuse */ 1466 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1467 pcbddc->onearnullspace = nearnullsp; 1468 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1469 for (i=0;i<nnsp_size;i++) { 1470 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1471 } 1472 } else { /* if near null space is not provided BDDC uses constants by default */ 1473 nnsp_size = 0; 1474 nnsp_has_cnst = PETSC_TRUE; 1475 } 1476 /* get max number of constraints on a single cc */ 1477 max_constraints = nnsp_size; 1478 if (nnsp_has_cnst) max_constraints++; 1479 1480 /* 1481 Evaluate maximum storage size needed by the procedure 1482 - temp_indices will contain start index of each constraint stored as follows 1483 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1484 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1485 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1486 */ 1487 total_counts = n_ISForFaces+n_ISForEdges; 1488 total_counts *= max_constraints; 1489 n_vertices = 0; 1490 if (ISForVertices) { 1491 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1492 } 1493 total_counts += n_vertices; 1494 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1495 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1496 total_counts = 0; 1497 max_size_of_constraint = 0; 1498 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1499 if (i<n_ISForEdges) { 1500 used_IS = &ISForEdges[i]; 1501 } else { 1502 used_IS = &ISForFaces[i-n_ISForEdges]; 1503 } 1504 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1505 total_counts += j; 1506 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1507 } 1508 total_counts *= max_constraints; 1509 total_counts += n_vertices; 1510 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1511 /* get local part of global near null space vectors */ 1512 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1513 for (k=0;k<nnsp_size;k++) { 1514 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1515 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1516 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1517 } 1518 1519 /* whether or not to skip lapack calls */ 1520 skip_lapack = PETSC_TRUE; 1521 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1522 1523 /* allocate some auxiliary stuff */ 1524 if (!skip_lapack || pcbddc->use_qr_single) { 1525 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1526 } 1527 1528 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1529 if (!skip_lapack) { 1530 PetscScalar temp_work; 1531 1532 #if defined(PETSC_MISSING_LAPACK_GESVD) 1533 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1534 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1535 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1536 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1537 #if defined(PETSC_USE_COMPLEX) 1538 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1539 #endif 1540 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1541 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1542 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1543 lwork = -1; 1544 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1545 #if !defined(PETSC_USE_COMPLEX) 1546 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1547 #else 1548 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1549 #endif 1550 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1551 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1552 #else /* on missing GESVD */ 1553 /* SVD */ 1554 PetscInt max_n,min_n; 1555 max_n = max_size_of_constraint; 1556 min_n = max_constraints; 1557 if (max_size_of_constraint < max_constraints) { 1558 min_n = max_size_of_constraint; 1559 max_n = max_constraints; 1560 } 1561 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1562 #if defined(PETSC_USE_COMPLEX) 1563 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1564 #endif 1565 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1566 lwork = -1; 1567 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1568 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1569 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1570 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1571 #if !defined(PETSC_USE_COMPLEX) 1572 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1573 #else 1574 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1575 #endif 1576 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1577 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1578 #endif /* on missing GESVD */ 1579 /* Allocate optimal workspace */ 1580 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1581 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1582 } 1583 /* Now we can loop on constraining sets */ 1584 total_counts = 0; 1585 temp_indices[0] = 0; 1586 /* vertices */ 1587 if (ISForVertices) { 1588 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1589 if (nnsp_has_cnst) { /* consider all vertices */ 1590 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1591 for (i=0;i<n_vertices;i++) { 1592 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1593 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1594 total_counts++; 1595 } 1596 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1597 PetscBool used_vertex; 1598 for (i=0;i<n_vertices;i++) { 1599 used_vertex = PETSC_FALSE; 1600 k = 0; 1601 while (!used_vertex && k<nnsp_size) { 1602 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1603 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1604 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1605 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1606 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1607 total_counts++; 1608 used_vertex = PETSC_TRUE; 1609 } 1610 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1611 k++; 1612 } 1613 } 1614 } 1615 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1616 n_vertices = total_counts; 1617 } 1618 1619 /* edges and faces */ 1620 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1621 if (ncc<n_ISForEdges) { 1622 used_IS = &ISForEdges[ncc]; 1623 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1624 } else { 1625 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1626 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1627 } 1628 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1629 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1630 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1631 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1632 /* change of basis should not be performed on local periodic nodes */ 1633 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1634 if (nnsp_has_cnst) { 1635 PetscScalar quad_value; 1636 temp_constraints++; 1637 if (!pcbddc->use_nnsp_true) { 1638 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1639 } else { 1640 quad_value = 1.0; 1641 } 1642 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1643 for (j=0;j<size_of_constraint;j++) { 1644 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1645 } 1646 /* sort by global ordering if using lapack subroutines */ 1647 if (!skip_lapack || pcbddc->use_qr_single) { 1648 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1649 for (j=0;j<size_of_constraint;j++) { 1650 permutation[j]=j; 1651 } 1652 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1653 for (j=0;j<size_of_constraint;j++) { 1654 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1655 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1656 } 1657 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1658 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1659 } 1660 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1661 total_counts++; 1662 } 1663 for (k=0;k<nnsp_size;k++) { 1664 PetscReal real_value; 1665 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1666 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1667 for (j=0;j<size_of_constraint;j++) { 1668 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1669 } 1670 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1671 /* check if array is null on the connected component */ 1672 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1673 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1674 if (real_value > 0.0) { /* keep indices and values */ 1675 /* sort by global ordering if using lapack subroutines */ 1676 if (!skip_lapack || pcbddc->use_qr_single) { 1677 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1678 for (j=0;j<size_of_constraint;j++) { 1679 permutation[j]=j; 1680 } 1681 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1682 for (j=0;j<size_of_constraint;j++) { 1683 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1684 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1685 } 1686 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1687 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1688 } 1689 temp_constraints++; 1690 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1691 total_counts++; 1692 } 1693 } 1694 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1695 valid_constraints = temp_constraints; 1696 if (!pcbddc->use_nnsp_true && temp_constraints) { 1697 if (temp_constraints == 1) { /* just normalize the constraint */ 1698 PetscScalar norm; 1699 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1700 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1701 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1702 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1703 } else { /* perform SVD */ 1704 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1705 1706 #if defined(PETSC_MISSING_LAPACK_GESVD) 1707 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1708 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1709 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1710 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1711 from that computed using LAPACKgesvd 1712 -> This is due to a different computation of eigenvectors in LAPACKheev 1713 -> The quality of the POD-computed basis will be the same */ 1714 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1715 /* Store upper triangular part of correlation matrix */ 1716 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1717 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1718 for (j=0;j<temp_constraints;j++) { 1719 for (k=0;k<j+1;k++) { 1720 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1721 } 1722 } 1723 /* compute eigenvalues and eigenvectors of correlation matrix */ 1724 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1725 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1726 #if !defined(PETSC_USE_COMPLEX) 1727 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1728 #else 1729 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1730 #endif 1731 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1732 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1733 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1734 j = 0; 1735 while (j < temp_constraints && singular_vals[j] < tol) j++; 1736 total_counts = total_counts-j; 1737 valid_constraints = temp_constraints-j; 1738 /* scale and copy POD basis into used quadrature memory */ 1739 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1740 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1741 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1742 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1743 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1744 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1745 if (j<temp_constraints) { 1746 PetscInt ii; 1747 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1748 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1749 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1750 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1751 for (k=0;k<temp_constraints-j;k++) { 1752 for (ii=0;ii<size_of_constraint;ii++) { 1753 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1754 } 1755 } 1756 } 1757 #else /* on missing GESVD */ 1758 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1759 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1760 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1761 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1762 #if !defined(PETSC_USE_COMPLEX) 1763 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1764 #else 1765 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1766 #endif 1767 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1768 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1769 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1770 k = temp_constraints; 1771 if (k > size_of_constraint) k = size_of_constraint; 1772 j = 0; 1773 while (j < k && singular_vals[k-j-1] < tol) j++; 1774 valid_constraints = k-j; 1775 total_counts = total_counts-temp_constraints+valid_constraints; 1776 #endif /* on missing GESVD */ 1777 } 1778 } 1779 /* setting change_of_basis flag is safe now */ 1780 if (boolforchange) { 1781 for (j=0;j<valid_constraints;j++) { 1782 PetscBTSet(change_basis,total_counts-j-1); 1783 } 1784 } 1785 } 1786 /* free index sets of faces, edges and vertices */ 1787 for (i=0;i<n_ISForFaces;i++) { 1788 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1789 } 1790 if (n_ISForFaces) { 1791 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1792 } 1793 for (i=0;i<n_ISForEdges;i++) { 1794 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1795 } 1796 if (n_ISForEdges) { 1797 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1798 } 1799 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1800 /* map temp_indices_to_constraint in boundary numbering */ 1801 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1802 if (i != temp_indices[total_counts]) { 1803 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1804 } 1805 1806 /* free workspace */ 1807 if (!skip_lapack || pcbddc->use_qr_single) { 1808 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 1809 } 1810 if (!skip_lapack) { 1811 ierr = PetscFree(work);CHKERRQ(ierr); 1812 #if defined(PETSC_USE_COMPLEX) 1813 ierr = PetscFree(rwork);CHKERRQ(ierr); 1814 #endif 1815 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1816 #if defined(PETSC_MISSING_LAPACK_GESVD) 1817 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1818 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1819 #endif 1820 } 1821 for (k=0;k<nnsp_size;k++) { 1822 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1823 } 1824 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1825 1826 /* set quantities in pcbddc data structure and store previous primal size */ 1827 /* n_vertices defines the number of subdomain corners in the primal space */ 1828 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1829 olocal_primal_size = pcbddc->local_primal_size; 1830 pcbddc->local_primal_size = total_counts; 1831 pcbddc->n_vertices = n_vertices; 1832 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1833 1834 /* Create constraint matrix */ 1835 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1836 /* so we need to set it up properly either with or without change of basis */ 1837 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1838 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1839 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1840 /* array to compute a local numbering of constraints : vertices first then constraints */ 1841 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1842 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1843 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1844 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1845 /* auxiliary stuff for basis change */ 1846 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1847 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1848 1849 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1850 total_primal_vertices=0; 1851 for (i=0;i<pcbddc->local_primal_size;i++) { 1852 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1853 if (size_of_constraint == 1) { 1854 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1855 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1856 aux_primal_minloc[total_primal_vertices]=0; 1857 total_primal_vertices++; 1858 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1859 PetscInt min_loc,min_index; 1860 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1861 /* find first untouched local node */ 1862 k = 0; 1863 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1864 min_index = global_indices[k]; 1865 min_loc = k; 1866 /* search the minimum among global nodes already untouched on the cc */ 1867 for (k=1;k<size_of_constraint;k++) { 1868 /* there can be more than one constraint on a single connected component */ 1869 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1870 min_index = global_indices[k]; 1871 min_loc = k; 1872 } 1873 } 1874 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1875 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1876 aux_primal_minloc[total_primal_vertices]=min_loc; 1877 total_primal_vertices++; 1878 } 1879 } 1880 /* determine if a QR strategy is needed for change of basis */ 1881 qr_needed = PETSC_FALSE; 1882 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1883 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1884 if (PetscBTLookup(change_basis,i)) { 1885 if (!pcbddc->use_qr_single) { 1886 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1887 j = 0; 1888 for (k=0;k<size_of_constraint;k++) { 1889 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1890 j++; 1891 } 1892 } 1893 /* found more than one primal dof on the cc */ 1894 if (j > 1) { 1895 PetscBTSet(qr_needed_idx,i); 1896 qr_needed = PETSC_TRUE; 1897 } 1898 } else { 1899 PetscBTSet(qr_needed_idx,i); 1900 qr_needed = PETSC_TRUE; 1901 } 1902 } 1903 } 1904 /* free workspace */ 1905 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1906 1907 /* permute indices in order to have a sorted set of vertices */ 1908 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1909 1910 /* nonzero structure of constraint matrix */ 1911 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1912 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1913 j=total_primal_vertices; 1914 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1915 if (!PetscBTLookup(change_basis,i)) { 1916 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1917 j++; 1918 } 1919 } 1920 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1921 ierr = PetscFree(nnz);CHKERRQ(ierr); 1922 /* set values in constraint matrix */ 1923 for (i=0;i<total_primal_vertices;i++) { 1924 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1925 } 1926 total_counts = total_primal_vertices; 1927 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1928 if (!PetscBTLookup(change_basis,i)) { 1929 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1930 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1931 total_counts++; 1932 } 1933 } 1934 /* assembling */ 1935 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1936 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1937 /* 1938 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1939 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1940 */ 1941 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1942 if (pcbddc->use_change_of_basis) { 1943 /* dual and primal dofs on a single cc */ 1944 PetscInt dual_dofs,primal_dofs; 1945 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1946 PetscInt primal_counter; 1947 /* working stuff for GEQRF */ 1948 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 1949 PetscBLASInt lqr_work; 1950 /* working stuff for UNGQR */ 1951 PetscScalar *gqr_work,lgqr_work_t; 1952 PetscBLASInt lgqr_work; 1953 /* working stuff for TRTRS */ 1954 PetscScalar *trs_rhs; 1955 PetscBLASInt Blas_NRHS; 1956 /* pointers for values insertion into change of basis matrix */ 1957 PetscInt *start_rows,*start_cols; 1958 PetscScalar *start_vals; 1959 /* working stuff for values insertion */ 1960 PetscBT is_primal; 1961 /* matrix sizes */ 1962 PetscInt global_size,local_size; 1963 /* work array for nonzeros */ 1964 PetscScalar *nnz_array; 1965 /* temporary change of basis */ 1966 Mat localChangeOfBasisMatrix; 1967 /* auxiliary work for global change of basis */ 1968 Vec nnz_vec; 1969 PetscInt *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz; 1970 PetscInt nvtxs,*xadj,*adjncy,*idxs_mapped; 1971 PetscScalar *vals; 1972 PetscBool done; 1973 1974 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 1975 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 1976 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1977 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1978 1979 /* nonzeros for local mat */ 1980 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 1981 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1982 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1983 if (PetscBTLookup(change_basis,i)) { 1984 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1985 if (PetscBTLookup(qr_needed_idx,i)) { 1986 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1987 } else { 1988 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1989 /* get local primal index on the cc */ 1990 j = 0; 1991 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1992 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1993 } 1994 } 1995 } 1996 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1997 /* Set initial identity in the matrix */ 1998 for (i=0;i<pcis->n_B;i++) { 1999 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2000 } 2001 2002 if (pcbddc->dbg_flag) { 2003 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2004 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2005 } 2006 2007 2008 /* Now we loop on the constraints which need a change of basis */ 2009 /* 2010 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2011 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2012 2013 Basic blocks of change of basis matrix T computed by 2014 2015 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2016 2017 | 1 0 ... 0 s_1/S | 2018 | 0 1 ... 0 s_2/S | 2019 | ... | 2020 | 0 ... 1 s_{n-1}/S | 2021 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2022 2023 with S = \sum_{i=1}^n s_i^2 2024 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2025 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2026 2027 - QR decomposition of constraints otherwise 2028 */ 2029 if (qr_needed) { 2030 /* space to store Q */ 2031 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2032 /* first we issue queries for optimal work */ 2033 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2034 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2035 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2036 lqr_work = -1; 2037 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2038 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2039 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2040 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2041 lgqr_work = -1; 2042 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2043 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2044 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2045 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2046 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2047 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2048 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2049 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2050 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2051 /* array to store scaling factors for reflectors */ 2052 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2053 /* array to store rhs and solution of triangular solver */ 2054 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2055 /* allocating workspace for check */ 2056 if (pcbddc->dbg_flag) { 2057 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2058 } 2059 } 2060 /* array to store whether a node is primal or not */ 2061 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2062 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2063 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2064 if (i != total_primal_vertices) { 2065 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2066 } 2067 for (i=0;i<total_primal_vertices;i++) { 2068 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2069 } 2070 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2071 2072 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2073 /* -> using implicit ordering contained in temp_indices data */ 2074 total_counts = pcbddc->n_vertices; 2075 primal_counter = total_counts; 2076 while (total_counts<pcbddc->local_primal_size) { 2077 primal_dofs = 1; 2078 if (PetscBTLookup(change_basis,total_counts)) { 2079 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2080 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2081 primal_dofs++; 2082 } 2083 /* get constraint info */ 2084 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2085 dual_dofs = size_of_constraint-primal_dofs; 2086 2087 if (pcbddc->dbg_flag) { 2088 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2089 } 2090 2091 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2092 2093 /* copy quadrature constraints for change of basis check */ 2094 if (pcbddc->dbg_flag) { 2095 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2096 } 2097 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2098 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2099 2100 /* compute QR decomposition of constraints */ 2101 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2102 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2103 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2104 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2105 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2106 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2107 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2108 2109 /* explictly compute R^-T */ 2110 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2111 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2112 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2113 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2114 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2115 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2116 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2117 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2118 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2119 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2120 2121 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2122 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2123 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2124 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2125 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2126 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2127 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2128 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2129 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2130 2131 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2132 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2133 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2134 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2135 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2136 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2137 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2138 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2139 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2140 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2141 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2142 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2143 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2144 2145 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2146 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2147 /* insert cols for primal dofs */ 2148 for (j=0;j<primal_dofs;j++) { 2149 start_vals = &qr_basis[j*size_of_constraint]; 2150 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2151 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2152 } 2153 /* insert cols for dual dofs */ 2154 for (j=0,k=0;j<dual_dofs;k++) { 2155 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2156 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2157 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2158 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2159 j++; 2160 } 2161 } 2162 2163 /* check change of basis */ 2164 if (pcbddc->dbg_flag) { 2165 PetscInt ii,jj; 2166 PetscBool valid_qr=PETSC_TRUE; 2167 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2168 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2169 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2170 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2171 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2172 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2173 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2174 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2175 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2176 for (jj=0;jj<size_of_constraint;jj++) { 2177 for (ii=0;ii<primal_dofs;ii++) { 2178 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2179 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2180 } 2181 } 2182 if (!valid_qr) { 2183 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2184 for (jj=0;jj<size_of_constraint;jj++) { 2185 for (ii=0;ii<primal_dofs;ii++) { 2186 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2187 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2188 } 2189 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2190 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2191 } 2192 } 2193 } 2194 } else { 2195 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2196 } 2197 } 2198 } else { /* simple transformation block */ 2199 PetscInt row,col; 2200 PetscScalar val,norm; 2201 2202 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2203 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2204 for (j=0;j<size_of_constraint;j++) { 2205 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2206 if (!PetscBTLookup(is_primal,row)) { 2207 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2208 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2209 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2210 } else { 2211 for (k=0;k<size_of_constraint;k++) { 2212 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2213 if (row != col) { 2214 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2215 } else { 2216 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2217 } 2218 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2219 } 2220 } 2221 } 2222 if (pcbddc->dbg_flag) { 2223 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2224 } 2225 } 2226 /* increment primal counter */ 2227 primal_counter += primal_dofs; 2228 } else { 2229 if (pcbddc->dbg_flag) { 2230 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2231 } 2232 } 2233 /* increment constraint counter total_counts */ 2234 total_counts += primal_dofs; 2235 } 2236 2237 /* free workspace */ 2238 if (qr_needed) { 2239 if (pcbddc->dbg_flag) { 2240 ierr = PetscFree(work);CHKERRQ(ierr); 2241 } 2242 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2243 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2244 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2245 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2246 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2247 } 2248 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2249 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2250 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2251 2252 /* assembling of global change of variable */ 2253 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2254 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2255 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2256 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2257 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2258 ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr); 2259 2260 /* nonzeros (overestimated) */ 2261 ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr); 2262 ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr); 2263 ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr); 2264 for (i=0;i<pcis->n;i++) { 2265 nnz_array[i] = 1.0; 2266 idxs_all[i] = i; 2267 } 2268 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2269 for (i=0;i<pcis->n_B;i++) { 2270 nnz_array[idxs_B[i]] = nnz[i]; 2271 } 2272 if (pcis->n) { 2273 ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr); 2274 } 2275 ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr); 2276 ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr); 2277 ierr = PetscFree(nnz);CHKERRQ(ierr); 2278 ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr); 2279 ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr); 2280 ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2281 for (i=0;i<local_size;i++) { 2282 d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size); 2283 o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size); 2284 } 2285 ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2286 ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr); 2287 ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2288 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2289 2290 /* Set identity on dirichlet dofs */ 2291 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2292 for (i=0;i<pcis->n-pcis->n_B;i++) { 2293 PetscScalar one=1.0; 2294 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr); 2295 } 2296 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2297 2298 /* Set values at interface dofs */ 2299 done = PETSC_TRUE; 2300 ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2301 if (!done) { 2302 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2303 } 2304 ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2305 ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr); 2306 ierr = ISLocalToGlobalMappingApply(pcis->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr); 2307 for (i=0;i<nvtxs;i++) { 2308 PetscInt row,*cols,ncols; 2309 PetscScalar *mat_vals; 2310 2311 row = idxs_B[i]; 2312 ncols = xadj[i+1]-xadj[i]; 2313 cols = idxs_mapped+xadj[i]; 2314 mat_vals = vals+xadj[i]; 2315 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr); 2316 } 2317 ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2318 if (!done) { 2319 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2320 } 2321 ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2322 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2323 ierr = PetscFree(idxs_mapped);CHKERRQ(ierr); 2324 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2325 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2326 2327 /* check */ 2328 if (pcbddc->dbg_flag) { 2329 PetscReal error; 2330 Vec x,x_change; 2331 2332 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2333 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2334 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2335 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2336 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2337 ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2338 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 2339 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2340 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2341 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2342 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2343 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2344 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2345 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr); 2346 ierr = VecDestroy(&x);CHKERRQ(ierr); 2347 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2348 } 2349 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2350 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2351 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2352 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2353 } 2354 2355 /* set up change of basis context */ 2356 if (pcbddc->ChangeOfBasisMatrix) { 2357 PCBDDCChange_ctx change_ctx; 2358 2359 if (!pcbddc->new_global_mat) { 2360 PetscInt global_size,local_size; 2361 2362 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2363 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2364 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2365 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2366 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2367 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2368 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2369 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2370 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2371 } else { 2372 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2373 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2374 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2375 } 2376 if (!pcbddc->user_ChangeOfBasisMatrix) { 2377 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2378 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2379 } else { 2380 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2381 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2382 } 2383 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2384 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2385 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2386 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2387 } 2388 2389 /* get indices in local ordering for vertices and constraints */ 2390 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2391 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2392 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2393 } 2394 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2395 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2396 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2397 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2398 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2399 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2400 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2401 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2402 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2403 /* set quantities in PCBDDC data struct */ 2404 pcbddc->n_actual_vertices = i; 2405 /* check if a new primal space has been introduced */ 2406 pcbddc->new_primal_space_local = PETSC_TRUE; 2407 if (olocal_primal_size == pcbddc->local_primal_size) { 2408 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2409 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2410 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2411 } 2412 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2413 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2414 2415 /* flush dbg viewer */ 2416 if (pcbddc->dbg_flag) { 2417 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2418 } 2419 2420 /* free workspace */ 2421 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2422 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2423 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2424 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2425 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2426 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 #undef __FUNCT__ 2431 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2432 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2433 { 2434 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2435 PC_IS *pcis = (PC_IS*)pc->data; 2436 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2437 PetscInt ierr,i,vertex_size; 2438 PetscViewer viewer=pcbddc->dbg_viewer; 2439 2440 PetscFunctionBegin; 2441 /* Reset previously computed graph */ 2442 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2443 /* Init local Graph struct */ 2444 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2445 2446 /* Check validity of the csr graph passed in by the user */ 2447 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2448 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2449 } 2450 2451 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2452 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2453 Mat mat_adj; 2454 PetscInt *xadj,*adjncy; 2455 PetscInt nvtxs; 2456 PetscBool flg_row=PETSC_TRUE; 2457 2458 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2459 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2460 if (!flg_row) { 2461 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2462 } 2463 if (pcbddc->use_local_adj) { 2464 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2465 pcbddc->deluxe_compute_rowadj = PETSC_FALSE; 2466 } else { /* just compute subdomain's connected components */ 2467 IS is_dummy; 2468 ISLocalToGlobalMapping l2gmap_dummy; 2469 PetscInt j,sum; 2470 PetscInt *cxadj,*cadjncy; 2471 const PetscInt *idxs; 2472 PCBDDCGraph graph; 2473 PetscBT is_on_boundary; 2474 2475 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2476 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2477 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2478 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2479 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2480 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2481 graph->xadj = xadj; 2482 graph->adjncy = adjncy; 2483 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2484 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2485 2486 if (pcbddc->dbg_flag) { 2487 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2488 for (i=0;i<graph->ncc;i++) { 2489 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2490 } 2491 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2492 } 2493 2494 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2495 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2496 for (i=0;i<pcis->n_B;i++) { 2497 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2498 } 2499 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2500 2501 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2502 sum = 0; 2503 for (i=0;i<graph->ncc;i++) { 2504 PetscInt sizecc = 0; 2505 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2506 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2507 sizecc++; 2508 } 2509 } 2510 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2511 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2512 cxadj[graph->queue[j]] = sizecc; 2513 } 2514 } 2515 sum += sizecc*sizecc; 2516 } 2517 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2518 sum = 0; 2519 for (i=0;i<nvtxs;i++) { 2520 PetscInt temp = cxadj[i]; 2521 cxadj[i] = sum; 2522 sum += temp; 2523 } 2524 cxadj[nvtxs] = sum; 2525 for (i=0;i<graph->ncc;i++) { 2526 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2527 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2528 PetscInt k,sizecc = 0; 2529 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2530 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2531 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2532 sizecc++; 2533 } 2534 } 2535 } 2536 } 2537 } 2538 if (nvtxs) { 2539 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2540 } else { 2541 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2542 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2543 } 2544 graph->xadj = 0; 2545 graph->adjncy = 0; 2546 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2547 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2548 } 2549 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2550 if (!flg_row) { 2551 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2552 } 2553 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2554 } 2555 2556 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2557 vertex_size = 1; 2558 if (pcbddc->user_provided_isfordofs) { 2559 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2560 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2561 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2562 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2563 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2564 } 2565 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2566 pcbddc->n_ISForDofs = 0; 2567 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2568 } 2569 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2570 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2571 } else { 2572 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2573 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2574 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2575 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2576 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2577 } 2578 } 2579 } 2580 2581 /* Setup of Graph */ 2582 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2583 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2584 } 2585 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2586 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2587 } 2588 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2589 2590 /* Graph's connected components analysis */ 2591 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2592 2593 /* print some info to stdout */ 2594 if (pcbddc->dbg_flag) { 2595 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2596 } 2597 2598 /* mark topography has done */ 2599 pcbddc->recompute_topography = PETSC_FALSE; 2600 PetscFunctionReturn(0); 2601 } 2602 2603 #undef __FUNCT__ 2604 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2605 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2606 { 2607 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2608 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2609 PetscErrorCode ierr; 2610 2611 PetscFunctionBegin; 2612 n = 0; 2613 vertices = 0; 2614 if (pcbddc->ConstraintMatrix) { 2615 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2616 for (i=0;i<local_primal_size;i++) { 2617 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2618 if (size_of_constraint == 1) n++; 2619 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2620 } 2621 if (vertices_idx) { 2622 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2623 n = 0; 2624 for (i=0;i<local_primal_size;i++) { 2625 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2626 if (size_of_constraint == 1) { 2627 vertices[n++]=row_cmat_indices[0]; 2628 } 2629 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2630 } 2631 } 2632 } 2633 *n_vertices = n; 2634 if (vertices_idx) *vertices_idx = vertices; 2635 PetscFunctionReturn(0); 2636 } 2637 2638 #undef __FUNCT__ 2639 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2640 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2641 { 2642 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2643 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2644 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2645 PetscBT touched; 2646 PetscErrorCode ierr; 2647 2648 /* This function assumes that the number of local constraints per connected component 2649 is not greater than the number of nodes defined for the connected component 2650 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2651 PetscFunctionBegin; 2652 n = 0; 2653 constraints_index = 0; 2654 if (pcbddc->ConstraintMatrix) { 2655 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2656 max_size_of_constraint = 0; 2657 for (i=0;i<local_primal_size;i++) { 2658 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2659 if (size_of_constraint > 1) { 2660 n++; 2661 } 2662 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2663 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2664 } 2665 if (constraints_idx) { 2666 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2667 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2668 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2669 n = 0; 2670 for (i=0;i<local_primal_size;i++) { 2671 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2672 if (size_of_constraint > 1) { 2673 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2674 /* find first untouched local node */ 2675 j = 0; 2676 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2677 min_index = row_cmat_global_indices[j]; 2678 min_loc = j; 2679 /* search the minimum among nodes not yet touched on the connected component 2680 since there can be more than one constraint on a single cc */ 2681 for (j=1;j<size_of_constraint;j++) { 2682 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2683 min_index = row_cmat_global_indices[j]; 2684 min_loc = j; 2685 } 2686 } 2687 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2688 constraints_index[n++] = row_cmat_indices[min_loc]; 2689 } 2690 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2691 } 2692 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2693 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2694 } 2695 } 2696 *n_constraints = n; 2697 if (constraints_idx) *constraints_idx = constraints_index; 2698 PetscFunctionReturn(0); 2699 } 2700 2701 #undef __FUNCT__ 2702 #define __FUNCT__ "PCBDDCSubsetNumbering" 2703 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2704 { 2705 Vec local_vec,global_vec; 2706 IS seqis,paris; 2707 VecScatter scatter_ctx; 2708 PetscScalar *array; 2709 PetscInt *temp_global_dofs; 2710 PetscScalar globalsum; 2711 PetscInt i,j,s; 2712 PetscInt nlocals,first_index,old_index,max_local; 2713 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2714 PetscMPIInt *dof_sizes,*dof_displs; 2715 PetscBool first_found; 2716 PetscErrorCode ierr; 2717 2718 PetscFunctionBegin; 2719 /* mpi buffers */ 2720 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2721 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2722 j = ( !rank_prec_comm ? size_prec_comm : 0); 2723 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2724 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2725 /* get maximum size of subset */ 2726 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2727 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2728 max_local = 0; 2729 for (i=0;i<n_local_dofs;i++) { 2730 if (max_local < temp_global_dofs[i] ) { 2731 max_local = temp_global_dofs[i]; 2732 } 2733 } 2734 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2735 max_global++; 2736 max_local = 0; 2737 for (i=0;i<n_local_dofs;i++) { 2738 if (max_local < local_dofs[i] ) { 2739 max_local = local_dofs[i]; 2740 } 2741 } 2742 max_local++; 2743 /* allocate workspace */ 2744 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2745 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2746 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2747 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2748 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2749 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2750 /* create scatter */ 2751 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2752 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2753 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2754 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2755 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2756 /* init array */ 2757 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2758 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2759 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2760 if (local_dofs_mult) { 2761 for (i=0;i<n_local_dofs;i++) { 2762 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2763 } 2764 } else { 2765 for (i=0;i<n_local_dofs;i++) { 2766 array[local_dofs[i]]=1.0; 2767 } 2768 } 2769 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2770 /* scatter into global vec and get total number of global dofs */ 2771 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2772 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2773 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2774 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2775 /* Fill global_vec with cumulative function for global numbering */ 2776 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2777 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2778 nlocals = 0; 2779 first_index = -1; 2780 first_found = PETSC_FALSE; 2781 for (i=0;i<s;i++) { 2782 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2783 first_found = PETSC_TRUE; 2784 first_index = i; 2785 } 2786 nlocals += (PetscInt)PetscRealPart(array[i]); 2787 } 2788 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2789 if (!rank_prec_comm) { 2790 dof_displs[0]=0; 2791 for (i=1;i<size_prec_comm;i++) { 2792 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2793 } 2794 } 2795 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2796 if (first_found) { 2797 array[first_index] += (PetscScalar)nlocals; 2798 old_index = first_index; 2799 for (i=first_index+1;i<s;i++) { 2800 if (PetscRealPart(array[i]) > 0.1) { 2801 array[i] += array[old_index]; 2802 old_index = i; 2803 } 2804 } 2805 } 2806 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2807 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2808 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2809 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2810 /* get global ordering of local dofs */ 2811 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2812 if (local_dofs_mult) { 2813 for (i=0;i<n_local_dofs;i++) { 2814 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2815 } 2816 } else { 2817 for (i=0;i<n_local_dofs;i++) { 2818 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2819 } 2820 } 2821 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2822 /* free workspace */ 2823 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2824 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2825 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2826 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2827 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2828 /* return pointer to global ordering of local dofs */ 2829 *global_numbering_subset = temp_global_dofs; 2830 PetscFunctionReturn(0); 2831 } 2832 2833 #undef __FUNCT__ 2834 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2835 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2836 { 2837 PetscInt i,j; 2838 PetscScalar *alphas; 2839 PetscErrorCode ierr; 2840 2841 PetscFunctionBegin; 2842 /* this implements stabilized Gram-Schmidt */ 2843 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2844 for (i=0;i<n;i++) { 2845 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2846 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2847 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2848 } 2849 ierr = PetscFree(alphas);CHKERRQ(ierr); 2850 PetscFunctionReturn(0); 2851 } 2852 2853 #undef __FUNCT__ 2854 #define __FUNCT__ "MatISGetSubassemblingPattern" 2855 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2856 { 2857 Mat subdomain_adj; 2858 IS new_ranks,ranks_send_to; 2859 MatPartitioning partitioner; 2860 Mat_IS *matis; 2861 PetscInt n_neighs,*neighs,*n_shared,**shared; 2862 PetscInt prank; 2863 PetscMPIInt size,rank,color; 2864 PetscInt *xadj,*adjncy,*oldranks; 2865 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2866 PetscInt i,local_size,threshold=0; 2867 PetscErrorCode ierr; 2868 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2869 PetscSubcomm subcomm; 2870 2871 PetscFunctionBegin; 2872 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2873 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2874 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2875 2876 /* Get info on mapping */ 2877 matis = (Mat_IS*)(mat->data); 2878 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2879 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2880 2881 /* build local CSR graph of subdomains' connectivity */ 2882 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2883 xadj[0] = 0; 2884 xadj[1] = PetscMax(n_neighs-1,0); 2885 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2886 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2887 2888 if (threshold) { 2889 PetscInt xadj_count = 0; 2890 for (i=1;i<n_neighs;i++) { 2891 if (n_shared[i] > threshold) { 2892 adjncy[xadj_count] = neighs[i]; 2893 adjncy_wgt[xadj_count] = n_shared[i]; 2894 xadj_count++; 2895 } 2896 } 2897 xadj[1] = xadj_count; 2898 } else { 2899 if (xadj[1]) { 2900 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2901 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2902 } 2903 } 2904 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2905 if (use_square) { 2906 for (i=0;i<xadj[1];i++) { 2907 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2908 } 2909 } 2910 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2911 2912 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 2913 2914 /* 2915 Restrict work on active processes only. 2916 */ 2917 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2918 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2919 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2920 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2921 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2922 if (color) { 2923 ierr = PetscFree(xadj);CHKERRQ(ierr); 2924 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2925 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2926 } else { 2927 PetscInt coarsening_ratio; 2928 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2929 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2930 prank = rank; 2931 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2932 /* 2933 for (i=0;i<size;i++) { 2934 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2935 } 2936 */ 2937 for (i=0;i<xadj[1];i++) { 2938 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2939 } 2940 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2941 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2942 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2943 2944 /* Partition */ 2945 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2946 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2947 if (use_vwgt) { 2948 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 2949 v_wgt[0] = local_size; 2950 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2951 } 2952 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2953 coarsening_ratio = size/n_subdomains; 2954 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2955 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2956 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2957 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2958 2959 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2960 if (contiguous) { 2961 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2962 } else { 2963 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2964 } 2965 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2966 /* clean up */ 2967 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2968 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2969 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2970 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2971 } 2972 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2973 2974 /* assemble parallel IS for sends */ 2975 i = 1; 2976 if (color) i=0; 2977 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2978 2979 /* get back IS */ 2980 *is_sends = ranks_send_to; 2981 PetscFunctionReturn(0); 2982 } 2983 2984 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2985 2986 #undef __FUNCT__ 2987 #define __FUNCT__ "MatISSubassemble" 2988 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 2989 { 2990 Mat local_mat; 2991 Mat_IS *matis; 2992 IS is_sends_internal; 2993 PetscInt rows,cols,new_local_rows; 2994 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 2995 PetscBool ismatis,isdense,newisdense,destroy_mat; 2996 ISLocalToGlobalMapping l2gmap; 2997 PetscInt* l2gmap_indices; 2998 const PetscInt* is_indices; 2999 MatType new_local_type; 3000 /* buffers */ 3001 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3002 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3003 PetscInt *recv_buffer_idxs_local; 3004 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3005 /* MPI */ 3006 MPI_Comm comm,comm_n; 3007 PetscSubcomm subcomm; 3008 PetscMPIInt n_sends,n_recvs,commsize; 3009 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3010 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3011 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3012 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3013 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3014 PetscErrorCode ierr; 3015 3016 PetscFunctionBegin; 3017 /* TODO: add missing checks */ 3018 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3019 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3020 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3021 PetscValidLogicalCollectiveInt(mat,nis,7); 3022 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3023 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3024 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3025 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3026 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3027 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3028 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3029 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3030 PetscInt mrows,mcols,mnrows,mncols; 3031 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3032 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3033 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3034 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3035 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3036 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3037 } 3038 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3039 PetscValidLogicalCollectiveInt(mat,bs,0); 3040 /* prepare IS for sending if not provided */ 3041 if (!is_sends) { 3042 PetscBool pcontig = PETSC_TRUE; 3043 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3044 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3045 } else { 3046 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3047 is_sends_internal = is_sends; 3048 } 3049 3050 /* get pointer of MATIS data */ 3051 matis = (Mat_IS*)mat->data; 3052 3053 /* get comm */ 3054 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3055 3056 /* compute number of sends */ 3057 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3058 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3059 3060 /* compute number of receives */ 3061 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3062 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3063 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3064 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3065 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3066 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3067 ierr = PetscFree(iflags);CHKERRQ(ierr); 3068 3069 /* restrict comm if requested */ 3070 subcomm = 0; 3071 destroy_mat = PETSC_FALSE; 3072 if (restrict_comm) { 3073 PetscMPIInt color,rank,subcommsize; 3074 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3075 color = 0; 3076 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3077 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3078 subcommsize = commsize - subcommsize; 3079 /* check if reuse has been requested */ 3080 if (reuse == MAT_REUSE_MATRIX) { 3081 if (*mat_n) { 3082 PetscMPIInt subcommsize2; 3083 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3084 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3085 comm_n = PetscObjectComm((PetscObject)*mat_n); 3086 } else { 3087 comm_n = PETSC_COMM_SELF; 3088 } 3089 } else { /* MAT_INITIAL_MATRIX */ 3090 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3091 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3092 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3093 comm_n = subcomm->comm; 3094 } 3095 /* flag to destroy *mat_n if not significative */ 3096 if (color) destroy_mat = PETSC_TRUE; 3097 } else { 3098 comm_n = comm; 3099 } 3100 3101 /* prepare send/receive buffers */ 3102 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3103 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3104 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3105 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3106 if (nis) { 3107 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3108 } 3109 3110 /* Get data from local matrices */ 3111 if (!isdense) { 3112 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3113 /* TODO: See below some guidelines on how to prepare the local buffers */ 3114 /* 3115 send_buffer_vals should contain the raw values of the local matrix 3116 send_buffer_idxs should contain: 3117 - MatType_PRIVATE type 3118 - PetscInt size_of_l2gmap 3119 - PetscInt global_row_indices[size_of_l2gmap] 3120 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3121 */ 3122 } else { 3123 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3124 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3125 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3126 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3127 send_buffer_idxs[1] = i; 3128 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3129 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3130 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3131 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3132 for (i=0;i<n_sends;i++) { 3133 ilengths_vals[is_indices[i]] = len*len; 3134 ilengths_idxs[is_indices[i]] = len+2; 3135 } 3136 } 3137 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3138 /* additional is (if any) */ 3139 if (nis) { 3140 PetscMPIInt psum; 3141 PetscInt j; 3142 for (j=0,psum=0;j<nis;j++) { 3143 PetscInt plen; 3144 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3145 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3146 psum += len+1; /* indices + lenght */ 3147 } 3148 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3149 for (j=0,psum=0;j<nis;j++) { 3150 PetscInt plen; 3151 const PetscInt *is_array_idxs; 3152 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3153 send_buffer_idxs_is[psum] = plen; 3154 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3155 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3156 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3157 psum += plen+1; /* indices + lenght */ 3158 } 3159 for (i=0;i<n_sends;i++) { 3160 ilengths_idxs_is[is_indices[i]] = psum; 3161 } 3162 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3163 } 3164 3165 buf_size_idxs = 0; 3166 buf_size_vals = 0; 3167 buf_size_idxs_is = 0; 3168 for (i=0;i<n_recvs;i++) { 3169 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3170 buf_size_vals += (PetscInt)olengths_vals[i]; 3171 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3172 } 3173 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3174 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3175 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3176 3177 /* get new tags for clean communications */ 3178 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3179 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3180 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3181 3182 /* allocate for requests */ 3183 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3184 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3185 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3186 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3187 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3188 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3189 3190 /* communications */ 3191 ptr_idxs = recv_buffer_idxs; 3192 ptr_vals = recv_buffer_vals; 3193 ptr_idxs_is = recv_buffer_idxs_is; 3194 for (i=0;i<n_recvs;i++) { 3195 source_dest = onodes[i]; 3196 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3197 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3198 ptr_idxs += olengths_idxs[i]; 3199 ptr_vals += olengths_vals[i]; 3200 if (nis) { 3201 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3202 ptr_idxs_is += olengths_idxs_is[i]; 3203 } 3204 } 3205 for (i=0;i<n_sends;i++) { 3206 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3207 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3208 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3209 if (nis) { 3210 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3211 } 3212 } 3213 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3214 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3215 3216 /* assemble new l2g map */ 3217 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3218 ptr_idxs = recv_buffer_idxs; 3219 new_local_rows = 0; 3220 for (i=0;i<n_recvs;i++) { 3221 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3222 ptr_idxs += olengths_idxs[i]; 3223 } 3224 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3225 ptr_idxs = recv_buffer_idxs; 3226 new_local_rows = 0; 3227 for (i=0;i<n_recvs;i++) { 3228 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3229 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3230 ptr_idxs += olengths_idxs[i]; 3231 } 3232 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3233 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3234 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3235 3236 /* infer new local matrix type from received local matrices type */ 3237 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3238 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3239 if (n_recvs) { 3240 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3241 ptr_idxs = recv_buffer_idxs; 3242 for (i=0;i<n_recvs;i++) { 3243 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3244 new_local_type_private = MATAIJ_PRIVATE; 3245 break; 3246 } 3247 ptr_idxs += olengths_idxs[i]; 3248 } 3249 switch (new_local_type_private) { 3250 case MATDENSE_PRIVATE: 3251 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3252 new_local_type = MATSEQAIJ; 3253 bs = 1; 3254 } else { /* if I receive only 1 dense matrix */ 3255 new_local_type = MATSEQDENSE; 3256 bs = 1; 3257 } 3258 break; 3259 case MATAIJ_PRIVATE: 3260 new_local_type = MATSEQAIJ; 3261 bs = 1; 3262 break; 3263 case MATBAIJ_PRIVATE: 3264 new_local_type = MATSEQBAIJ; 3265 break; 3266 case MATSBAIJ_PRIVATE: 3267 new_local_type = MATSEQSBAIJ; 3268 break; 3269 default: 3270 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3271 break; 3272 } 3273 } else { /* by default, new_local_type is seqdense */ 3274 new_local_type = MATSEQDENSE; 3275 bs = 1; 3276 } 3277 3278 /* create MATIS object if needed */ 3279 if (reuse == MAT_INITIAL_MATRIX) { 3280 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3281 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3282 } else { 3283 /* it also destroys the local matrices */ 3284 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3285 } 3286 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3287 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3288 3289 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3290 3291 /* Global to local map of received indices */ 3292 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3293 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3294 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3295 3296 /* restore attributes -> type of incoming data and its size */ 3297 buf_size_idxs = 0; 3298 for (i=0;i<n_recvs;i++) { 3299 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3300 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3301 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3302 } 3303 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3304 3305 /* set preallocation */ 3306 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3307 if (!newisdense) { 3308 PetscInt *new_local_nnz=0; 3309 3310 ptr_vals = recv_buffer_vals; 3311 ptr_idxs = recv_buffer_idxs_local; 3312 if (n_recvs) { 3313 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3314 } 3315 for (i=0;i<n_recvs;i++) { 3316 PetscInt j; 3317 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3318 for (j=0;j<*(ptr_idxs+1);j++) { 3319 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3320 } 3321 } else { 3322 /* TODO */ 3323 } 3324 ptr_idxs += olengths_idxs[i]; 3325 } 3326 if (new_local_nnz) { 3327 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3328 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3329 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3330 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3331 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3332 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3333 } else { 3334 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3335 } 3336 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3337 } else { 3338 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3339 } 3340 3341 /* set values */ 3342 ptr_vals = recv_buffer_vals; 3343 ptr_idxs = recv_buffer_idxs_local; 3344 for (i=0;i<n_recvs;i++) { 3345 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3346 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3347 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3348 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3349 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3350 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3351 } else { 3352 /* TODO */ 3353 } 3354 ptr_idxs += olengths_idxs[i]; 3355 ptr_vals += olengths_vals[i]; 3356 } 3357 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3358 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3359 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3360 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3361 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3362 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3363 3364 #if 0 3365 if (!restrict_comm) { /* check */ 3366 Vec lvec,rvec; 3367 PetscReal infty_error; 3368 3369 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3370 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3371 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3372 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3373 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3374 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3375 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3376 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3377 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3378 } 3379 #endif 3380 3381 /* assemble new additional is (if any) */ 3382 if (nis) { 3383 PetscInt **temp_idxs,*count_is,j,psum; 3384 3385 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3386 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3387 ptr_idxs = recv_buffer_idxs_is; 3388 psum = 0; 3389 for (i=0;i<n_recvs;i++) { 3390 for (j=0;j<nis;j++) { 3391 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3392 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3393 psum += plen; 3394 ptr_idxs += plen+1; /* shift pointer to received data */ 3395 } 3396 } 3397 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3398 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3399 for (i=1;i<nis;i++) { 3400 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3401 } 3402 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3403 ptr_idxs = recv_buffer_idxs_is; 3404 for (i=0;i<n_recvs;i++) { 3405 for (j=0;j<nis;j++) { 3406 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3407 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3408 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3409 ptr_idxs += plen+1; /* shift pointer to received data */ 3410 } 3411 } 3412 for (i=0;i<nis;i++) { 3413 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3414 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3415 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3416 } 3417 ierr = PetscFree(count_is);CHKERRQ(ierr); 3418 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3419 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3420 } 3421 /* free workspace */ 3422 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3423 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3424 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3425 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3426 if (isdense) { 3427 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3428 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3429 } else { 3430 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3431 } 3432 if (nis) { 3433 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3434 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3435 } 3436 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3437 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3438 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3439 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3440 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3441 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3442 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3443 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3444 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3445 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3446 ierr = PetscFree(onodes);CHKERRQ(ierr); 3447 if (nis) { 3448 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3449 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3450 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3451 } 3452 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3453 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3454 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3455 for (i=0;i<nis;i++) { 3456 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3457 } 3458 } 3459 PetscFunctionReturn(0); 3460 } 3461 3462 /* temporary hack into ksp private data structure */ 3463 #include <petsc-private/kspimpl.h> 3464 3465 #undef __FUNCT__ 3466 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3467 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3468 { 3469 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3470 PC_IS *pcis = (PC_IS*)pc->data; 3471 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3472 MatNullSpace CoarseNullSpace=NULL; 3473 ISLocalToGlobalMapping coarse_islg; 3474 IS coarse_is,*isarray; 3475 PetscInt i,im_active=-1,active_procs=-1; 3476 PetscInt nis,nisdofs,nisneu; 3477 PC pc_temp; 3478 PCType coarse_pc_type; 3479 KSPType coarse_ksp_type; 3480 PetscBool multilevel_requested,multilevel_allowed; 3481 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3482 Mat t_coarse_mat_is; 3483 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3484 PetscMPIInt all_procs; 3485 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3486 PetscBool compute_vecs = PETSC_FALSE; 3487 PetscScalar *array; 3488 PetscErrorCode ierr; 3489 3490 PetscFunctionBegin; 3491 /* Assign global numbering to coarse dofs */ 3492 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3493 compute_vecs = PETSC_TRUE; 3494 PetscInt ocoarse_size; 3495 ocoarse_size = pcbddc->coarse_size; 3496 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3497 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3498 /* see if we can avoid some work */ 3499 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3500 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3501 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3502 coarse_reuse = PETSC_FALSE; 3503 } else { /* we can safely reuse already computed coarse matrix */ 3504 coarse_reuse = PETSC_TRUE; 3505 } 3506 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3507 coarse_reuse = PETSC_FALSE; 3508 } 3509 /* reset any subassembling information */ 3510 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3511 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3512 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3513 coarse_reuse = PETSC_TRUE; 3514 } 3515 3516 /* count "active" (i.e. with positive local size) and "void" processes */ 3517 im_active = !!(pcis->n); 3518 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3519 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3520 void_procs = all_procs-active_procs; 3521 csin_type_simple = PETSC_TRUE; 3522 redist = PETSC_FALSE; 3523 if (pcbddc->current_level && void_procs) { 3524 csin_ml = PETSC_TRUE; 3525 ncoarse_ml = void_procs; 3526 csin_ds = PETSC_TRUE; 3527 ncoarse_ds = void_procs; 3528 } else { 3529 csin_ml = PETSC_FALSE; 3530 ncoarse_ml = all_procs; 3531 if (void_procs) { 3532 csin_ds = PETSC_TRUE; 3533 ncoarse_ds = void_procs; 3534 csin_type_simple = PETSC_FALSE; 3535 } else { 3536 if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) { 3537 csin_ds = PETSC_TRUE; 3538 ncoarse_ds = pcbddc->redistribute_coarse; 3539 redist = PETSC_TRUE; 3540 } else { 3541 csin_ds = PETSC_FALSE; 3542 ncoarse_ds = all_procs; 3543 } 3544 } 3545 } 3546 3547 /* 3548 test if we can go multilevel: three conditions must be satisfied: 3549 - we have not exceeded the number of levels requested 3550 - we can actually subassemble the active processes 3551 - we can find a suitable number of MPI processes where we can place the subassembled problem 3552 */ 3553 multilevel_allowed = PETSC_FALSE; 3554 multilevel_requested = PETSC_FALSE; 3555 if (pcbddc->current_level < pcbddc->max_levels) { 3556 multilevel_requested = PETSC_TRUE; 3557 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3558 multilevel_allowed = PETSC_FALSE; 3559 } else { 3560 multilevel_allowed = PETSC_TRUE; 3561 } 3562 } 3563 /* determine number of process partecipating to coarse solver */ 3564 if (multilevel_allowed) { 3565 ncoarse = ncoarse_ml; 3566 csin = csin_ml; 3567 } else { 3568 ncoarse = ncoarse_ds; 3569 csin = csin_ds; 3570 } 3571 3572 /* creates temporary l2gmap and IS for coarse indexes */ 3573 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3574 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3575 3576 /* creates temporary MATIS object for coarse matrix */ 3577 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3578 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3579 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3580 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3581 #if 0 3582 { 3583 PetscViewer viewer; 3584 char filename[256]; 3585 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3586 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3587 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3588 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3589 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3590 } 3591 #endif 3592 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3593 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3594 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3595 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3596 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3597 3598 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3599 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3600 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3601 const PetscInt *idxs; 3602 ISLocalToGlobalMapping tmap; 3603 3604 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3605 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3606 /* allocate space for temporary storage */ 3607 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3608 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3609 /* allocate for IS array */ 3610 nisdofs = pcbddc->n_ISForDofsLocal; 3611 nisneu = !!pcbddc->NeumannBoundariesLocal; 3612 nis = nisdofs + nisneu; 3613 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3614 /* dofs splitting */ 3615 for (i=0;i<nisdofs;i++) { 3616 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3617 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3618 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3619 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3620 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3621 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3622 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3623 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3624 } 3625 /* neumann boundaries */ 3626 if (pcbddc->NeumannBoundariesLocal) { 3627 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3628 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3629 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3630 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3631 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3632 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3633 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3634 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3635 } 3636 /* free memory */ 3637 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3638 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3639 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3640 } else { 3641 nis = 0; 3642 nisdofs = 0; 3643 nisneu = 0; 3644 isarray = NULL; 3645 } 3646 /* destroy no longer needed map */ 3647 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3648 3649 /* restrict on coarse candidates (if needed) */ 3650 coarse_mat_is = NULL; 3651 if (csin) { 3652 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3653 if (redist) { 3654 PetscMPIInt rank; 3655 PetscInt spc,n_spc_p1,dest[1]; 3656 3657 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3658 spc = all_procs/pcbddc->redistribute_coarse; 3659 n_spc_p1 = all_procs%pcbddc->redistribute_coarse; 3660 if (rank > n_spc_p1*(spc+1)-1) { 3661 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 3662 } else { 3663 dest[0] = rank/(spc+1); 3664 } 3665 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3666 } else { 3667 PetscInt j,tissize,*nisindices; 3668 PetscInt *coarse_candidates; 3669 const PetscInt* tisindices; 3670 /* get coarse candidates' ranks in pc communicator */ 3671 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 3672 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3673 for (i=0,j=0;i<all_procs;i++) { 3674 if (!coarse_candidates[i]) { 3675 coarse_candidates[j]=i; 3676 j++; 3677 } 3678 } 3679 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3680 /* get a suitable subassembling pattern */ 3681 if (csin_type_simple) { 3682 PetscMPIInt rank; 3683 PetscInt issize,isidx; 3684 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3685 if (im_active) { 3686 issize = 1; 3687 isidx = (PetscInt)rank; 3688 } else { 3689 issize = 0; 3690 isidx = -1; 3691 } 3692 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3693 } else { 3694 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3695 } 3696 if (pcbddc->dbg_flag) { 3697 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3698 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3699 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3700 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3701 for (i=0;i<j;i++) { 3702 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3703 } 3704 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3705 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3706 } 3707 /* shift the pattern on coarse candidates */ 3708 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3709 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3710 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 3711 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3712 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3713 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3714 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3715 } 3716 } 3717 if (pcbddc->dbg_flag) { 3718 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3719 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3720 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3721 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3722 } 3723 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3724 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3725 } else { 3726 if (pcbddc->dbg_flag) { 3727 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3728 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3729 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3730 } 3731 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3732 coarse_mat_is = t_coarse_mat_is; 3733 } 3734 3735 /* create local to global scatters for coarse problem */ 3736 if (compute_vecs) { 3737 PetscInt lrows; 3738 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3739 if (coarse_mat_is) { 3740 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3741 } else { 3742 lrows = 0; 3743 } 3744 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3745 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3746 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3747 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3748 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3749 } 3750 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3751 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3752 3753 /* set defaults for coarse KSP and PC */ 3754 if (multilevel_allowed) { 3755 coarse_ksp_type = KSPRICHARDSON; 3756 coarse_pc_type = PCBDDC; 3757 } else { 3758 coarse_ksp_type = KSPPREONLY; 3759 coarse_pc_type = PCREDUNDANT; 3760 } 3761 3762 /* print some info if requested */ 3763 if (pcbddc->dbg_flag) { 3764 if (!multilevel_allowed) { 3765 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3766 if (multilevel_requested) { 3767 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3768 } else if (pcbddc->max_levels) { 3769 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3770 } 3771 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3772 } 3773 } 3774 3775 /* create the coarse KSP object only once with defaults */ 3776 if (coarse_mat_is) { 3777 MatReuse coarse_mat_reuse; 3778 PetscViewer dbg_viewer = NULL; 3779 if (pcbddc->dbg_flag) { 3780 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3781 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3782 } 3783 if (!pcbddc->coarse_ksp) { 3784 char prefix[256],str_level[16]; 3785 size_t len; 3786 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3787 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3788 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3789 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3790 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3791 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3792 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3793 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3794 /* prefix */ 3795 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3796 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3797 if (!pcbddc->current_level) { 3798 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3799 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3800 } else { 3801 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3802 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3803 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3804 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3805 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3806 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3807 } 3808 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3809 /* allow user customization */ 3810 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3811 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3812 } 3813 3814 /* get some info after set from options */ 3815 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3816 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3817 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3818 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3819 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3820 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3821 isbddc = PETSC_FALSE; 3822 } 3823 if (isredundant) { 3824 KSP inner_ksp; 3825 PC inner_pc; 3826 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3827 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3828 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3829 } 3830 3831 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3832 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3833 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3834 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3835 if (nisdofs) { 3836 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3837 for (i=0;i<nisdofs;i++) { 3838 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3839 } 3840 } 3841 if (nisneu) { 3842 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3843 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3844 } 3845 3846 /* assemble coarse matrix */ 3847 if (coarse_reuse) { 3848 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3849 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3850 coarse_mat_reuse = MAT_REUSE_MATRIX; 3851 } else { 3852 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3853 } 3854 if (isbddc || isnn) { 3855 if (pcbddc->coarsening_ratio > 1) { 3856 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3857 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3858 if (pcbddc->dbg_flag) { 3859 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3860 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3861 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3862 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3863 } 3864 } 3865 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3866 } else { 3867 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 3868 coarse_mat = coarse_mat_is; 3869 } 3870 } else { 3871 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3872 } 3873 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3874 3875 /* propagate symmetry info to coarse matrix */ 3876 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3877 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3878 3879 /* set operators */ 3880 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3881 if (pcbddc->dbg_flag) { 3882 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3883 } 3884 } else { /* processes non partecipating to coarse solver (if any) */ 3885 coarse_mat = 0; 3886 } 3887 ierr = PetscFree(isarray);CHKERRQ(ierr); 3888 #if 0 3889 { 3890 PetscViewer viewer; 3891 char filename[256]; 3892 sprintf(filename,"coarse_mat.m"); 3893 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3894 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3895 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3896 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3897 } 3898 #endif 3899 3900 /* Compute coarse null space (special handling by BDDC only) */ 3901 if (pcbddc->NullSpace) { 3902 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3903 } 3904 3905 if (pcbddc->coarse_ksp) { 3906 Vec crhs,csol; 3907 PetscBool ispreonly; 3908 if (CoarseNullSpace) { 3909 if (isbddc) { 3910 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3911 } else { 3912 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3913 } 3914 } 3915 /* setup coarse ksp */ 3916 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3917 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3918 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3919 /* hack */ 3920 if (!csol) { 3921 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3922 } 3923 if (!crhs) { 3924 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3925 } 3926 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3927 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3928 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3929 KSP check_ksp; 3930 KSPType check_ksp_type; 3931 PC check_pc; 3932 Vec check_vec,coarse_vec; 3933 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 3934 PetscInt its; 3935 PetscBool compute_eigs; 3936 PetscReal *eigs_r,*eigs_c; 3937 PetscInt neigs; 3938 const char *prefix; 3939 3940 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3941 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3942 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3943 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3944 if (ispreonly) { 3945 check_ksp_type = KSPPREONLY; 3946 compute_eigs = PETSC_FALSE; 3947 } else { 3948 check_ksp_type = KSPGMRES; 3949 compute_eigs = PETSC_TRUE; 3950 } 3951 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3952 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3953 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3954 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3955 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 3956 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 3957 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 3958 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 3959 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3960 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3961 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3962 /* create random vec */ 3963 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 3964 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 3965 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3966 if (CoarseNullSpace) { 3967 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3968 } 3969 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3970 /* solve coarse problem */ 3971 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 3972 if (CoarseNullSpace) { 3973 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 3974 } 3975 /* set eigenvalue estimation if preonly has not been requested */ 3976 if (compute_eigs) { 3977 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 3978 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 3979 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 3980 lambda_max = eigs_r[neigs-1]; 3981 lambda_min = eigs_r[0]; 3982 if (pcbddc->use_coarse_estimates) { 3983 if (lambda_max>lambda_min) { 3984 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3985 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3986 } 3987 } 3988 } 3989 3990 /* check coarse problem residual error */ 3991 if (pcbddc->dbg_flag) { 3992 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 3993 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3994 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 3995 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3996 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3997 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3998 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3999 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4000 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4001 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4002 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4003 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4004 if (compute_eigs) { 4005 PetscReal lambda_max_s,lambda_min_s; 4006 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4007 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4008 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4009 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4010 for (i=0;i<neigs;i++) { 4011 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4012 } 4013 } 4014 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4015 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4016 } 4017 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4018 if (compute_eigs) { 4019 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4020 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4021 } 4022 } 4023 } 4024 /* print additional info */ 4025 if (pcbddc->dbg_flag) { 4026 /* waits until all processes reaches this point */ 4027 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4028 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4029 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4030 } 4031 4032 /* free memory */ 4033 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4034 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4035 PetscFunctionReturn(0); 4036 } 4037 4038 #undef __FUNCT__ 4039 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4040 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4041 { 4042 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4043 PC_IS* pcis = (PC_IS*)pc->data; 4044 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4045 PetscInt i,coarse_size; 4046 PetscInt *local_primal_indices; 4047 PetscErrorCode ierr; 4048 4049 PetscFunctionBegin; 4050 /* Compute global number of coarse dofs */ 4051 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4052 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4053 } 4054 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4055 4056 /* check numbering */ 4057 if (pcbddc->dbg_flag) { 4058 PetscScalar coarsesum,*array; 4059 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4060 4061 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4062 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4063 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4064 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4065 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4066 for (i=0;i<pcbddc->local_primal_size;i++) { 4067 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4068 } 4069 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4070 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4071 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4072 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4073 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4074 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4075 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4076 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4077 for (i=0;i<pcis->n;i++) { 4078 if (array[i] == 1.0) { 4079 set_error = PETSC_TRUE; 4080 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4081 } 4082 } 4083 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4084 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4085 for (i=0;i<pcis->n;i++) { 4086 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4087 } 4088 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4089 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4090 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4091 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4092 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4093 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4094 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4095 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4096 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4097 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4098 for (i=0;i<pcbddc->local_primal_size;i++) { 4099 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4100 } 4101 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4102 } 4103 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4104 if (set_error_reduced) { 4105 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4106 } 4107 } 4108 /* get back data */ 4109 *coarse_size_n = coarse_size; 4110 *local_primal_indices_n = local_primal_indices; 4111 PetscFunctionReturn(0); 4112 } 4113 4114 #undef __FUNCT__ 4115 #define __FUNCT__ "PCBDDCGlobalToLocal" 4116 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4117 { 4118 IS localis_t; 4119 PetscInt i,lsize,*idxs,n; 4120 PetscScalar *vals; 4121 PetscErrorCode ierr; 4122 4123 PetscFunctionBegin; 4124 /* get indices in local ordering exploiting local to global map */ 4125 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4126 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4127 for (i=0;i<lsize;i++) vals[i] = 1.0; 4128 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4129 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4130 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4131 if (idxs) { /* multilevel guard */ 4132 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4133 } 4134 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4135 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4136 ierr = PetscFree(vals);CHKERRQ(ierr); 4137 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4138 /* now compute set in local ordering */ 4139 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4140 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4141 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4142 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4143 for (i=0,lsize=0;i<n;i++) { 4144 if (PetscRealPart(vals[i]) > 0.5) { 4145 lsize++; 4146 } 4147 } 4148 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4149 for (i=0,lsize=0;i<n;i++) { 4150 if (PetscRealPart(vals[i]) > 0.5) { 4151 idxs[lsize++] = i; 4152 } 4153 } 4154 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4155 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4156 *localis = localis_t; 4157 PetscFunctionReturn(0); 4158 } 4159 4160 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4161 #undef __FUNCT__ 4162 #define __FUNCT__ "PCBDDCMatMult_Private" 4163 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4164 { 4165 PCBDDCChange_ctx change_ctx; 4166 PetscErrorCode ierr; 4167 4168 PetscFunctionBegin; 4169 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4170 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4171 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4172 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4173 PetscFunctionReturn(0); 4174 } 4175 4176 #undef __FUNCT__ 4177 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4178 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4179 { 4180 PCBDDCChange_ctx change_ctx; 4181 PetscErrorCode ierr; 4182 4183 PetscFunctionBegin; 4184 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4185 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4186 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4187 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4188 PetscFunctionReturn(0); 4189 } 4190