1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCSetUpSolvers" 10 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscScalar *coarse_submat_vals; 14 PetscErrorCode ierr; 15 16 PetscFunctionBegin; 17 /* Compute matrix after change of basis and extract local submatrices */ 18 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D */ 21 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 22 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 23 24 /* Setup local solvers ksp_D and ksp_R */ 25 /* PCBDDCSetUpLocalScatters should be called first! */ 26 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 27 28 /* Change global null space passed in by the user if change of basis has been requested */ 29 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 30 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 31 } 32 33 /* 34 Setup local correction and local part of coarse basis. 35 Gives back the dense local part of the coarse matrix in column major ordering 36 */ 37 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 38 39 /* Compute total number of coarse nodes and setup coarse solver */ 40 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 41 42 /* free */ 43 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 #undef __FUNCT__ 48 #define __FUNCT__ "PCBDDCResetCustomization" 49 PetscErrorCode PCBDDCResetCustomization(PC pc) 50 { 51 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 52 PetscErrorCode ierr; 53 54 PetscFunctionBegin; 55 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 57 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 59 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 61 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 62 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 63 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 64 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 65 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 #undef __FUNCT__ 70 #define __FUNCT__ "PCBDDCResetTopography" 71 PetscErrorCode PCBDDCResetTopography(PC pc) 72 { 73 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 74 PetscErrorCode ierr; 75 76 PetscFunctionBegin; 77 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 78 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 79 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 80 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 81 PetscFunctionReturn(0); 82 } 83 84 #undef __FUNCT__ 85 #define __FUNCT__ "PCBDDCResetSolvers" 86 PetscErrorCode PCBDDCResetSolvers(PC pc) 87 { 88 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 89 PetscErrorCode ierr; 90 91 PetscFunctionBegin; 92 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 93 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 100 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 101 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 102 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 103 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 104 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 106 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 107 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 108 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 109 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 110 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 111 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 112 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 113 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 114 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 115 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 116 PetscFunctionReturn(0); 117 } 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 121 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 122 { 123 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 124 PC_IS *pcis = (PC_IS*)pc->data; 125 VecType impVecType; 126 PetscInt n_constraints,n_R,old_size; 127 PetscErrorCode ierr; 128 129 PetscFunctionBegin; 130 if (!pcbddc->ConstraintMatrix) { 131 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 132 } 133 /* get sizes */ 134 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 135 n_R = pcis->n-pcbddc->n_actual_vertices; 136 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 137 /* local work vectors (try to avoid unneeded work)*/ 138 /* R nodes */ 139 old_size = -1; 140 if (pcbddc->vec1_R) { 141 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 142 } 143 if (n_R != old_size) { 144 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 145 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 146 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 147 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 148 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 149 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 150 } 151 /* local primal dofs */ 152 old_size = -1; 153 if (pcbddc->vec1_P) { 154 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 155 } 156 if (pcbddc->local_primal_size != old_size) { 157 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 158 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 159 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 160 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 161 } 162 /* local explicit constraints */ 163 old_size = -1; 164 if (pcbddc->vec1_C) { 165 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 166 } 167 if (n_constraints && n_constraints != old_size) { 168 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 169 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 170 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 171 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 172 } 173 PetscFunctionReturn(0); 174 } 175 176 #undef __FUNCT__ 177 #define __FUNCT__ "PCBDDCSetUpCorrection" 178 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 179 { 180 PetscErrorCode ierr; 181 /* pointers to pcis and pcbddc */ 182 PC_IS* pcis = (PC_IS*)pc->data; 183 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 184 /* submatrices of local problem */ 185 Mat A_RV,A_VR,A_VV; 186 /* working matrices */ 187 Mat M1,M2,M3,C_CR; 188 /* working vectors */ 189 Vec vec1_C,vec2_C,vec1_V,vec2_V; 190 /* additional working stuff */ 191 IS is_aux; 192 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 193 const PetscScalar *array,*row_cmat_values; 194 const PetscInt *row_cmat_indices,*idx_R_local; 195 PetscInt *idx_V_B,*auxindices; 196 PetscInt n_vertices,n_constraints,size_of_constraint; 197 PetscInt i,j,n_R,n_D,n_B; 198 PetscBool unsymmetric_check; 199 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 200 MatType impMatType; 201 /* some shortcuts to scalars */ 202 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 203 /* for debugging purposes */ 204 PetscReal *coarsefunctions_errors,*constraints_errors; 205 206 PetscFunctionBegin; 207 /* get number of vertices (corners plus constraints with change of basis) 208 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 209 n_vertices = pcbddc->n_actual_vertices; 210 n_constraints = pcbddc->local_primal_size-n_vertices; 211 /* Set Non-overlapping dimensions */ 212 n_B = pcis->n_B; n_D = pcis->n - n_B; 213 n_R = pcis->n-n_vertices; 214 215 /* Set types for local objects needed by BDDC precondtioner */ 216 impMatType = MATSEQDENSE; 217 218 /* Allocating some extra storage just to be safe */ 219 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 220 for (i=0;i<pcis->n;i++) auxindices[i]=i; 221 222 /* vertices in boundary numbering */ 223 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 224 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 225 if (i != n_vertices) { 226 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 227 } 228 229 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 230 if (n_constraints) { 231 /* see if we can save some allocations */ 232 if (pcbddc->local_auxmat2) { 233 PetscInt on_R,on_constraints; 234 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 235 if (on_R != n_R || on_constraints != n_constraints) { 236 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 237 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 238 } 239 } 240 /* work vectors */ 241 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 242 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 243 /* auxiliary matrices */ 244 if (!pcbddc->local_auxmat2) { 245 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 246 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 247 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 248 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 249 } 250 251 /* Extract constraints on R nodes: C_{CR} */ 252 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 253 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 254 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 255 256 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 257 for (i=0;i<n_constraints;i++) { 258 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 259 /* Get row of constraint matrix in R numbering */ 260 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 261 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 262 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 263 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 264 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 265 /* Solve for row of constraint matrix in R numbering */ 266 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 267 /* Set values in local_auxmat2 */ 268 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 269 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 270 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 271 } 272 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 273 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 274 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 275 276 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 277 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 278 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 279 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 280 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 281 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 282 ierr = MatSetUp(M1);CHKERRQ(ierr); 283 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 284 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 285 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 286 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 287 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 288 ierr = MatDestroy(&M2);CHKERRQ(ierr); 289 ierr = MatDestroy(&M3);CHKERRQ(ierr); 290 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 291 if (!pcbddc->local_auxmat1) { 292 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 293 } else { 294 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 295 } 296 } 297 298 /* Get submatrices from subdomain matrix */ 299 if (n_vertices) { 300 PetscInt ibs,mbs; 301 PetscBool issbaij; 302 Mat newmat; 303 304 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 305 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 306 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 307 if (ibs != mbs) { /* need to convert to SEQAIJ */ 308 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 309 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 310 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 311 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 312 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 313 } else { 314 /* this is safe */ 315 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 316 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 317 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 318 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 319 /* which of the two approaches is faster? */ 320 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 321 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 322 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 323 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 324 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 325 } else { 326 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 327 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 328 } 329 } 330 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 331 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 332 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 333 } 334 335 /* Matrix of coarse basis functions (local) */ 336 if (pcbddc->coarse_phi_B) { 337 PetscInt on_B,on_primal; 338 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 339 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 340 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 341 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 342 } 343 } 344 if (pcbddc->coarse_phi_D) { 345 PetscInt on_D,on_primal; 346 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 347 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 348 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 349 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 350 } 351 } 352 if (!pcbddc->coarse_phi_B) { 353 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 354 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 355 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 356 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 357 } 358 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 359 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 360 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 361 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 362 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 363 } 364 365 if (pcbddc->dbg_flag) { 366 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 367 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 368 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 369 } 370 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 371 ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr); 372 373 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 374 375 /* vertices */ 376 for (i=0;i<n_vertices;i++) { 377 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 378 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 379 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 380 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 381 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 382 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 383 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 384 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 385 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 386 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 387 if (n_constraints) { 388 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 389 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 390 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 391 } 392 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 393 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 394 395 /* Set values in coarse basis function and subdomain part of coarse_mat */ 396 /* coarse basis functions */ 397 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 398 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 399 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 400 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 401 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 402 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 403 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 404 if (pcbddc->switch_static || pcbddc->dbg_flag) { 405 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 406 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 407 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 408 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 409 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 410 } 411 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 412 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 413 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 415 if (n_constraints) { 416 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 417 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 418 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 419 } 420 421 /* check */ 422 if (pcbddc->dbg_flag) { 423 /* assemble subdomain vector on local nodes */ 424 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 425 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 426 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 427 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 428 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 429 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 430 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 431 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 432 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 433 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 434 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 436 if (n_constraints) { 437 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 438 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 440 } 441 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 442 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 443 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 444 /* check saddle point solution */ 445 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 446 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 447 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 448 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 449 /* shift by the identity matrix */ 450 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 451 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 452 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 453 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 454 } 455 } 456 457 /* constraints */ 458 for (i=0;i<n_constraints;i++) { 459 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 460 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 461 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 462 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 463 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 464 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 465 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 466 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 467 if (n_vertices) { 468 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 469 } 470 /* Set values in coarse basis function and subdomain part of coarse_mat */ 471 /* coarse basis functions */ 472 j = i+n_vertices; /* don't touch this! */ 473 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 474 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 475 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 476 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 477 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 478 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 479 if (pcbddc->switch_static || pcbddc->dbg_flag) { 480 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 481 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 482 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 483 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 484 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 485 } 486 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 487 if (n_vertices) { 488 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 489 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 490 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 491 } 492 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 493 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 494 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 495 496 if (pcbddc->dbg_flag) { 497 /* assemble subdomain vector on nodes */ 498 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 499 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 500 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 501 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 502 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 503 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 504 /* assemble subdomain vector of lagrange multipliers */ 505 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 506 if (n_vertices) { 507 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 508 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 509 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 510 } 511 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 512 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 513 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 514 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 515 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 516 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 517 /* check saddle point solution */ 518 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 519 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 520 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 521 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 522 /* shift by the identity matrix */ 523 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 524 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 525 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 526 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 527 } 528 } 529 /* call assembling routines for local coarse basis */ 530 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 532 if (pcbddc->switch_static || pcbddc->dbg_flag) { 533 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 535 } 536 537 /* compute other basis functions for non-symmetric problems */ 538 ierr = MatIsSymmetric(pc->pmat,1.e-4,&pcbddc->issym);CHKERRQ(ierr); 539 if (!pcbddc->issym) { 540 if (!pcbddc->coarse_psi_B) { 541 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 542 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 543 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 544 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 545 } 546 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 547 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 548 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 549 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 550 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 551 } 552 for (i=0;i<pcbddc->local_primal_size;i++) { 553 if (n_constraints) { 554 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 555 for (j=0;j<n_constraints;j++) { 556 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 557 } 558 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 559 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 560 } 561 if (i<n_vertices) { 562 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 563 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 564 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 565 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 566 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 567 if (n_constraints) { 568 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 569 } 570 } else { 571 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 572 } 573 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 574 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 575 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 576 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 577 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 578 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 579 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 580 if (i<n_vertices) { 581 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 582 } 583 if (pcbddc->switch_static || pcbddc->dbg_flag) { 584 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 585 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 586 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 587 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 588 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 589 } 590 591 if (pcbddc->dbg_flag) { 592 /* assemble subdomain vector on nodes */ 593 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 594 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 595 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 596 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 597 if (i<n_vertices) { 598 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 599 } 600 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 601 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 602 /* assemble subdomain vector of lagrange multipliers */ 603 for (j=0;j<pcbddc->local_primal_size;j++) { 604 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 605 } 606 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 607 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 608 /* check saddle point solution */ 609 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 610 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 611 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 612 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 613 /* shift by the identity matrix */ 614 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 615 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 616 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 617 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 618 } 619 } 620 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 621 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 622 if (pcbddc->switch_static || pcbddc->dbg_flag) { 623 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 624 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 625 } 626 unsymmetric_check = PETSC_TRUE; 627 } else { /* take references to already computed coarse basis */ 628 unsymmetric_check = PETSC_FALSE; 629 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 630 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 631 if (pcbddc->coarse_phi_D) { 632 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 633 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 634 } 635 } 636 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 637 /* Checking coarse_sub_mat and coarse basis functios */ 638 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 639 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 640 if (pcbddc->dbg_flag) { 641 Mat coarse_sub_mat; 642 Mat AUXMAT,TM1,TM2,TM3,TM4; 643 Mat coarse_phi_D,coarse_phi_B; 644 Mat coarse_psi_D,coarse_psi_B; 645 Mat A_II,A_BB,A_IB,A_BI; 646 MatType checkmattype=MATSEQAIJ; 647 PetscReal real_value; 648 649 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 650 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 651 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 652 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 653 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 654 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 655 if (unsymmetric_check) { 656 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 657 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 658 } 659 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 660 661 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 662 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 663 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 664 if (unsymmetric_check) { 665 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 666 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 667 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 668 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 669 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 670 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 671 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 672 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 673 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 674 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 675 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 676 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 677 } else { 678 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 679 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 680 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 681 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 682 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 683 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 684 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 685 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 686 } 687 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 688 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 689 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 690 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 691 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 692 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 695 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 697 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 698 for (i=0;i<pcbddc->local_primal_size;i++) { 699 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 700 } 701 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 702 for (i=0;i<pcbddc->local_primal_size;i++) { 703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 704 } 705 if (unsymmetric_check) { 706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 707 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 708 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 709 } 710 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 711 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 712 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 713 } 714 } 715 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 716 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 717 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 718 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 719 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 720 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 721 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 722 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 723 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 724 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 725 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 726 if (unsymmetric_check) { 727 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 728 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 729 } 730 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 731 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 732 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 733 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 734 } 735 /* free memory */ 736 if (n_vertices) { 737 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 738 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 739 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 740 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 741 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 742 } 743 if (n_constraints) { 744 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 745 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 746 ierr = MatDestroy(&M1);CHKERRQ(ierr); 747 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 748 } 749 ierr = PetscFree(auxindices);CHKERRQ(ierr); 750 /* get back data */ 751 *coarse_submat_vals_n = coarse_submat_vals; 752 PetscFunctionReturn(0); 753 } 754 755 #undef __FUNCT__ 756 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 757 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 758 { 759 PC_IS* pcis = (PC_IS*)(pc->data); 760 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 761 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 762 PetscBool issbaij,isseqaij; 763 /* manage repeated solves */ 764 MatReuse reuse; 765 PetscErrorCode ierr; 766 767 PetscFunctionBegin; 768 if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) { 769 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 770 } 771 /* get mat flags */ 772 reuse = MAT_INITIAL_MATRIX; 773 if (pc->setupcalled) { 774 if (pc->flag == SAME_NONZERO_PATTERN) { 775 reuse = MAT_REUSE_MATRIX; 776 } else { 777 reuse = MAT_INITIAL_MATRIX; 778 } 779 } 780 if (reuse == MAT_INITIAL_MATRIX) { 781 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 782 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 783 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 784 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 785 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 786 } 787 788 /* transform local matrices if needed */ 789 if (pcbddc->ChangeOfBasisMatrix) { 790 Mat *change_mat_all; 791 IS is_local,is_global; 792 PetscBool sorted; 793 PetscInt *idxs_perm; 794 795 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_local);CHKERRQ(ierr); 796 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 797 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 798 ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr); 799 if (!sorted) { 800 const PetscInt *idxs; 801 PetscInt *idxs_sorted,i; 802 803 ierr = PetscMalloc1(pcis->n,&idxs_perm);CHKERRQ(ierr); 804 ierr = PetscMalloc1(pcis->n,&idxs_sorted);CHKERRQ(ierr); 805 for (i=0;i<pcis->n;i++) { 806 idxs_perm[i] = i; 807 } 808 ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr); 809 ierr = PetscSortIntWithPermutation(pcis->n,idxs,idxs_perm);CHKERRQ(ierr); 810 for (i=0;i<pcis->n;i++) { 811 idxs_sorted[i] = idxs[idxs_perm[i]]; 812 } 813 ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr); 814 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 815 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr); 816 } 817 818 /* get change of basis on the whole set of local dofs */ 819 ierr = MatGetSubMatrices(pcbddc->ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr); 820 821 if (!sorted) { 822 Mat new_mat; 823 IS is_perm; 824 PetscInt *idxs,i; 825 826 ierr = PetscMalloc1(pcis->n,&idxs);CHKERRQ(ierr); 827 for (i=0;i<pcis->n;i++) { 828 idxs[idxs_perm[i]] = i; 829 } 830 ierr = PetscFree(idxs_perm);CHKERRQ(ierr); 831 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr); 832 ierr = ISSetPermutation(is_perm);CHKERRQ(ierr); 833 ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr); 834 ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr); 835 change_mat_all[0] = new_mat; 836 ierr = ISDestroy(&is_perm);CHKERRQ(ierr); 837 } 838 839 /* check */ 840 if (pcbddc->dbg_flag) { 841 Vec x,x_change; 842 PetscReal error; 843 844 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 845 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 846 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 847 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 848 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 849 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 850 ierr = MatMult(change_mat_all[0],pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 851 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 852 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 853 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 854 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 855 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 856 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 857 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 858 ierr = VecDestroy(&x);CHKERRQ(ierr); 859 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 860 } 861 862 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 863 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr); 864 if (isseqaij) { 865 ierr = MatPtAP(matis->A,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 866 } else { 867 Mat work_mat; 868 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 869 ierr = MatPtAP(work_mat,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 870 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 871 } 872 /* 873 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 874 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 875 */ 876 ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr); 877 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 878 } else { 879 /* without change of basis, the local matrix is unchanged */ 880 if (!pcbddc->local_mat) { 881 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 882 pcbddc->local_mat = matis->A; 883 } 884 } 885 886 /* get submatrices */ 887 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 888 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 889 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 890 if (!issbaij) { 891 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 892 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 893 } else { 894 Mat newmat; 895 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 896 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 897 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 898 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 899 } 900 PetscFunctionReturn(0); 901 } 902 903 #undef __FUNCT__ 904 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 905 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 906 { 907 PC_IS* pcis = (PC_IS*)(pc->data); 908 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 909 IS is_aux1,is_aux2; 910 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 911 PetscInt n_vertices,i,j,n_R,n_D,n_B; 912 PetscInt vbs,bs; 913 PetscBT bitmask; 914 PetscErrorCode ierr; 915 916 PetscFunctionBegin; 917 /* 918 No need to setup local scatters if 919 - primal space is unchanged 920 AND 921 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 922 AND 923 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 924 */ 925 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 926 PetscFunctionReturn(0); 927 } 928 /* destroy old objects */ 929 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 930 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 931 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 932 /* Set Non-overlapping dimensions */ 933 n_B = pcis->n_B; n_D = pcis->n - n_B; 934 n_vertices = pcbddc->n_actual_vertices; 935 /* create auxiliary bitmask */ 936 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 937 for (i=0;i<n_vertices;i++) { 938 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 939 } 940 941 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 942 ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr); 943 for (i=0, n_R=0; i<pcis->n; i++) { 944 if (!PetscBTLookup(bitmask,i)) { 945 idx_R_local[n_R] = i; 946 n_R++; 947 } 948 } 949 950 /* Block code */ 951 vbs = 1; 952 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 953 if (bs>1 && !(n_vertices%bs)) { 954 PetscBool is_blocked = PETSC_TRUE; 955 PetscInt *vary; 956 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 957 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 958 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 959 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 960 for (i=0; i<n_vertices; i++) { 961 if (vary[i]!=0 && vary[i]!=bs) { 962 is_blocked = PETSC_FALSE; 963 break; 964 } 965 } 966 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 967 vbs = bs; 968 for (i=0;i<n_R/vbs;i++) { 969 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 970 } 971 } 972 ierr = PetscFree(vary);CHKERRQ(ierr); 973 } 974 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 975 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 976 977 /* print some info if requested */ 978 if (pcbddc->dbg_flag) { 979 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 980 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 981 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 982 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 983 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 984 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 985 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 986 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 987 } 988 989 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 990 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 991 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr); 992 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr); 993 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 994 for (i=0; i<n_D; i++) { 995 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 996 } 997 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 998 for (i=0, j=0; i<n_R; i++) { 999 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1000 aux_array1[j++] = i; 1001 } 1002 } 1003 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1004 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1005 for (i=0, j=0; i<n_B; i++) { 1006 if (!PetscBTLookup(bitmask,is_indices[i])) { 1007 aux_array2[j++] = i; 1008 } 1009 } 1010 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1011 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1012 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1013 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1014 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1015 1016 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1017 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1018 for (i=0, j=0; i<n_R; i++) { 1019 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1020 aux_array1[j++] = i; 1021 } 1022 } 1023 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1024 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1025 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1026 } 1027 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1028 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1029 PetscFunctionReturn(0); 1030 } 1031 1032 1033 #undef __FUNCT__ 1034 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1035 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1036 { 1037 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1038 PC_IS *pcis = (PC_IS*)pc->data; 1039 PC pc_temp; 1040 Mat A_RR; 1041 MatReuse reuse; 1042 PetscScalar m_one = -1.0; 1043 PetscReal value; 1044 PetscInt n_D,n_R,ibs,mbs; 1045 PetscBool use_exact,use_exact_reduced,issbaij; 1046 PetscErrorCode ierr; 1047 /* prefixes stuff */ 1048 char dir_prefix[256],neu_prefix[256],str_level[16]; 1049 size_t len; 1050 1051 PetscFunctionBegin; 1052 1053 /* compute prefixes */ 1054 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1055 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1056 if (!pcbddc->current_level) { 1057 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1058 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1059 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1060 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1061 } else { 1062 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1063 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1064 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1065 len -= 15; /* remove "pc_bddc_coarse_" */ 1066 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1067 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1068 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1069 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1070 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1071 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1072 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1073 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1074 } 1075 1076 /* DIRICHLET PROBLEM */ 1077 /* Matrix for Dirichlet problem is pcis->A_II */ 1078 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1079 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1080 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1081 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1082 /* default */ 1083 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1084 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1085 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1086 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1087 if (issbaij) { 1088 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1089 } else { 1090 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1091 } 1092 /* Allow user's customization */ 1093 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1094 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1095 } 1096 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1097 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1098 if (!n_D) { 1099 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1100 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1101 } 1102 /* Set Up KSP for Dirichlet problem of BDDC */ 1103 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1104 /* set ksp_D into pcis data */ 1105 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1106 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1107 pcis->ksp_D = pcbddc->ksp_D; 1108 1109 /* NEUMANN PROBLEM */ 1110 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1111 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1112 if (pcbddc->ksp_R) { /* already created ksp */ 1113 PetscInt nn_R; 1114 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1115 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1116 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1117 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1118 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1119 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1120 reuse = MAT_INITIAL_MATRIX; 1121 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1122 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1123 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1124 reuse = MAT_INITIAL_MATRIX; 1125 } else { /* safe to reuse the matrix */ 1126 reuse = MAT_REUSE_MATRIX; 1127 } 1128 } 1129 /* last check */ 1130 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1131 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1132 reuse = MAT_INITIAL_MATRIX; 1133 } 1134 } else { /* first time, so we need to create the matrix */ 1135 reuse = MAT_INITIAL_MATRIX; 1136 } 1137 /* extract A_RR */ 1138 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1139 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1140 if (ibs != mbs) { 1141 Mat newmat; 1142 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1143 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1144 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1145 } else { 1146 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1147 } 1148 if (!pcbddc->ksp_R) { /* create object if not present */ 1149 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1150 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1151 /* default */ 1152 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1153 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1154 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1155 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1156 if (issbaij) { 1157 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1158 } else { 1159 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1160 } 1161 /* Allow user's customization */ 1162 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1163 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1164 } 1165 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1166 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1167 if (!n_R) { 1168 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1169 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1170 } 1171 /* Set Up KSP for Neumann problem of BDDC */ 1172 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1173 1174 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1175 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1176 /* Dirichlet */ 1177 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1178 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1179 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1180 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1181 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1182 /* need to be adapted? */ 1183 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1184 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1185 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1186 /* print info */ 1187 if (pcbddc->dbg_flag) { 1188 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1189 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1190 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1191 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1192 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1193 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1194 } 1195 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1196 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1197 } 1198 1199 /* Neumann */ 1200 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1201 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1202 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1203 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1204 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1205 /* need to be adapted? */ 1206 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1207 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1208 /* print info */ 1209 if (pcbddc->dbg_flag) { 1210 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1211 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1212 } 1213 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1214 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1215 } 1216 } 1217 /* free Neumann problem's matrix */ 1218 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1224 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1225 { 1226 PetscErrorCode ierr; 1227 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1228 1229 PetscFunctionBegin; 1230 if (applytranspose) { 1231 if (pcbddc->local_auxmat1) { 1232 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1233 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1234 } 1235 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1236 } else { 1237 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1238 if (pcbddc->local_auxmat1) { 1239 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1240 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1241 } 1242 } 1243 PetscFunctionReturn(0); 1244 } 1245 1246 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1249 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1250 { 1251 PetscErrorCode ierr; 1252 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1253 PC_IS* pcis = (PC_IS*) (pc->data); 1254 const PetscScalar zero = 0.0; 1255 1256 PetscFunctionBegin; 1257 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1258 if (applytranspose) { 1259 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1260 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1261 } else { 1262 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1263 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1264 } 1265 /* start communications from local primal nodes to rhs of coarse solver */ 1266 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1267 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 1270 /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */ 1271 /* TODO remove null space when doing multilevel */ 1272 if (pcbddc->coarse_ksp) { 1273 if (applytranspose) { 1274 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1275 } else { 1276 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1277 } 1278 } 1279 /* start communications from coarse solver solution to local primal nodes */ 1280 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1281 1282 /* Local solution on R nodes */ 1283 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1284 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1285 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1286 if (pcbddc->switch_static) { 1287 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1288 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1289 } 1290 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1291 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1292 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1293 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1294 if (pcbddc->switch_static) { 1295 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1296 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1297 } 1298 1299 /* complete communications from coarse sol to local primal nodes */ 1300 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1301 1302 /* Sum contributions from two levels */ 1303 if (applytranspose) { 1304 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1305 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1306 } else { 1307 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1308 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1309 } 1310 PetscFunctionReturn(0); 1311 } 1312 1313 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1314 #undef __FUNCT__ 1315 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1316 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1317 { 1318 PetscErrorCode ierr; 1319 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1320 PetscScalar *array,*array2; 1321 Vec from,to; 1322 1323 PetscFunctionBegin; 1324 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1325 from = pcbddc->coarse_vec; 1326 to = pcbddc->vec1_P; 1327 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1328 Vec tvec; 1329 PetscInt lsize; 1330 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1331 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1332 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1333 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1334 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1335 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1336 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1337 } 1338 } else { /* from local to global -> put data in coarse right hand side */ 1339 from = pcbddc->vec1_P; 1340 to = pcbddc->coarse_vec; 1341 } 1342 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1348 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1349 { 1350 PetscErrorCode ierr; 1351 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1352 PetscScalar *array,*array2; 1353 Vec from,to; 1354 1355 PetscFunctionBegin; 1356 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1357 from = pcbddc->coarse_vec; 1358 to = pcbddc->vec1_P; 1359 } else { /* from local to global -> put data in coarse right hand side */ 1360 from = pcbddc->vec1_P; 1361 to = pcbddc->coarse_vec; 1362 } 1363 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1364 if (smode == SCATTER_FORWARD) { 1365 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1366 Vec tvec; 1367 PetscInt lsize; 1368 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1369 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1370 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1371 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1372 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1373 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1374 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1375 } 1376 } 1377 PetscFunctionReturn(0); 1378 } 1379 1380 /* uncomment for testing purposes */ 1381 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1382 #undef __FUNCT__ 1383 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1384 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1385 { 1386 PetscErrorCode ierr; 1387 PC_IS* pcis = (PC_IS*)(pc->data); 1388 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1389 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1390 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1391 MatType impMatType=MATSEQAIJ; 1392 /* one and zero */ 1393 PetscScalar one=1.0,zero=0.0; 1394 /* space to store constraints and their local indices */ 1395 PetscScalar *temp_quadrature_constraint; 1396 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1397 /* iterators */ 1398 PetscInt i,j,k,total_counts,temp_start_ptr; 1399 /* stuff to store connected components stored in pcbddc->mat_graph */ 1400 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1401 PetscInt n_ISForFaces,n_ISForEdges; 1402 /* near null space stuff */ 1403 MatNullSpace nearnullsp; 1404 const Vec *nearnullvecs; 1405 Vec *localnearnullsp; 1406 PetscBool nnsp_has_cnst; 1407 PetscInt nnsp_size; 1408 PetscScalar *array; 1409 /* BLAS integers */ 1410 PetscBLASInt lwork,lierr; 1411 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1412 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1413 /* LAPACK working arrays for SVD or POD */ 1414 PetscBool skip_lapack; 1415 PetscScalar *work; 1416 PetscReal *singular_vals; 1417 #if defined(PETSC_USE_COMPLEX) 1418 PetscReal *rwork; 1419 #endif 1420 #if defined(PETSC_MISSING_LAPACK_GESVD) 1421 PetscBLASInt Blas_one_2=1; 1422 PetscScalar *temp_basis,*correlation_mat; 1423 #else 1424 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1425 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1426 #endif 1427 /* reuse */ 1428 PetscInt olocal_primal_size; 1429 PetscInt *oprimal_indices_local_idxs; 1430 /* change of basis */ 1431 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1432 PetscBool boolforchange,qr_needed; 1433 PetscBT touched,change_basis,qr_needed_idx; 1434 /* auxiliary stuff */ 1435 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1436 /* some quantities */ 1437 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1438 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1439 1440 1441 PetscFunctionBegin; 1442 /* Destroy Mat objects computed previously */ 1443 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1444 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1445 /* Get index sets for faces, edges and vertices from graph */ 1446 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1447 pcbddc->use_vertices = PETSC_TRUE; 1448 } 1449 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1450 /* HACKS (the two following code branches) */ 1451 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1452 pcbddc->use_change_of_basis = PETSC_TRUE; 1453 pcbddc->use_change_on_faces = PETSC_FALSE; 1454 } 1455 if (pcbddc->NullSpace) { 1456 /* use_change_of_basis should be consistent among processors */ 1457 PetscBool tbool = pcbddc->use_change_of_basis; 1458 ierr = MPI_Allreduce(&tbool,&(pcbddc->use_change_of_basis),1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1459 } 1460 /* print some info */ 1461 if (pcbddc->dbg_flag) { 1462 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1463 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1464 i = 0; 1465 if (ISForVertices) { 1466 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1467 } 1468 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1469 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1470 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1471 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1472 } 1473 /* check if near null space is attached to global mat */ 1474 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1475 if (nearnullsp) { 1476 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1477 /* remove any stored info */ 1478 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1479 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1480 /* store information for BDDC solver reuse */ 1481 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1482 pcbddc->onearnullspace = nearnullsp; 1483 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1484 for (i=0;i<nnsp_size;i++) { 1485 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1486 } 1487 } else { /* if near null space is not provided BDDC uses constants by default */ 1488 nnsp_size = 0; 1489 nnsp_has_cnst = PETSC_TRUE; 1490 } 1491 /* get max number of constraints on a single cc */ 1492 max_constraints = nnsp_size; 1493 if (nnsp_has_cnst) max_constraints++; 1494 1495 /* 1496 Evaluate maximum storage size needed by the procedure 1497 - temp_indices will contain start index of each constraint stored as follows 1498 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1499 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1500 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1501 */ 1502 total_counts = n_ISForFaces+n_ISForEdges; 1503 total_counts *= max_constraints; 1504 n_vertices = 0; 1505 if (ISForVertices) { 1506 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1507 } 1508 total_counts += n_vertices; 1509 ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr); 1510 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1511 total_counts = 0; 1512 max_size_of_constraint = 0; 1513 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1514 if (i<n_ISForEdges) { 1515 used_IS = &ISForEdges[i]; 1516 } else { 1517 used_IS = &ISForFaces[i-n_ISForEdges]; 1518 } 1519 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1520 total_counts += j; 1521 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1522 } 1523 total_counts *= max_constraints; 1524 total_counts += n_vertices; 1525 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1526 /* get local part of global near null space vectors */ 1527 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1528 for (k=0;k<nnsp_size;k++) { 1529 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1530 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1531 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1532 } 1533 1534 /* whether or not to skip lapack calls */ 1535 skip_lapack = PETSC_TRUE; 1536 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1537 1538 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1539 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1540 PetscScalar temp_work; 1541 #if defined(PETSC_MISSING_LAPACK_GESVD) 1542 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1543 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1544 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1545 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1546 #if defined(PETSC_USE_COMPLEX) 1547 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1548 #endif 1549 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1550 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1551 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1552 lwork = -1; 1553 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1554 #if !defined(PETSC_USE_COMPLEX) 1555 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1556 #else 1557 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1558 #endif 1559 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1560 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1561 #else /* on missing GESVD */ 1562 /* SVD */ 1563 PetscInt max_n,min_n; 1564 max_n = max_size_of_constraint; 1565 min_n = max_constraints; 1566 if (max_size_of_constraint < max_constraints) { 1567 min_n = max_size_of_constraint; 1568 max_n = max_constraints; 1569 } 1570 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1571 #if defined(PETSC_USE_COMPLEX) 1572 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1573 #endif 1574 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1575 lwork = -1; 1576 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1577 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1578 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1579 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1580 #if !defined(PETSC_USE_COMPLEX) 1581 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1582 #else 1583 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1584 #endif 1585 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1586 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1587 #endif /* on missing GESVD */ 1588 /* Allocate optimal workspace */ 1589 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1590 ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr); 1591 } 1592 /* Now we can loop on constraining sets */ 1593 total_counts = 0; 1594 temp_indices[0] = 0; 1595 /* vertices */ 1596 if (ISForVertices) { 1597 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1598 if (nnsp_has_cnst) { /* consider all vertices */ 1599 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1600 for (i=0;i<n_vertices;i++) { 1601 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1602 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1603 total_counts++; 1604 } 1605 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1606 PetscBool used_vertex; 1607 for (i=0;i<n_vertices;i++) { 1608 used_vertex = PETSC_FALSE; 1609 k = 0; 1610 while (!used_vertex && k<nnsp_size) { 1611 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1612 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1613 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1614 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1615 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1616 total_counts++; 1617 used_vertex = PETSC_TRUE; 1618 } 1619 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1620 k++; 1621 } 1622 } 1623 } 1624 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1625 n_vertices = total_counts; 1626 } 1627 1628 /* edges and faces */ 1629 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1630 if (i<n_ISForEdges) { 1631 used_IS = &ISForEdges[i]; 1632 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1633 } else { 1634 used_IS = &ISForFaces[i-n_ISForEdges]; 1635 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1636 } 1637 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1638 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1639 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1640 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1641 /* change of basis should not be performed on local periodic nodes */ 1642 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1643 if (nnsp_has_cnst) { 1644 PetscScalar quad_value; 1645 temp_constraints++; 1646 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1647 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1648 for (j=0;j<size_of_constraint;j++) { 1649 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1650 } 1651 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1652 total_counts++; 1653 } 1654 for (k=0;k<nnsp_size;k++) { 1655 PetscReal real_value; 1656 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1657 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1658 for (j=0;j<size_of_constraint;j++) { 1659 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1660 } 1661 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1662 /* check if array is null on the connected component */ 1663 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1664 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1665 if (real_value > 0.0) { /* keep indices and values */ 1666 temp_constraints++; 1667 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1668 total_counts++; 1669 } 1670 } 1671 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1672 valid_constraints = temp_constraints; 1673 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1674 if (!pcbddc->use_nnsp_true && temp_constraints) { 1675 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1676 1677 #if defined(PETSC_MISSING_LAPACK_GESVD) 1678 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1679 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1680 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1681 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1682 from that computed using LAPACKgesvd 1683 -> This is due to a different computation of eigenvectors in LAPACKheev 1684 -> The quality of the POD-computed basis will be the same */ 1685 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1686 /* Store upper triangular part of correlation matrix */ 1687 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1688 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1689 for (j=0;j<temp_constraints;j++) { 1690 for (k=0;k<j+1;k++) { 1691 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1692 } 1693 } 1694 /* compute eigenvalues and eigenvectors of correlation matrix */ 1695 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1696 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1697 #if !defined(PETSC_USE_COMPLEX) 1698 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1699 #else 1700 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1701 #endif 1702 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1703 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1704 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1705 j=0; 1706 while (j < temp_constraints && singular_vals[j] < tol) j++; 1707 total_counts=total_counts-j; 1708 valid_constraints = temp_constraints-j; 1709 /* scale and copy POD basis into used quadrature memory */ 1710 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1711 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1712 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1713 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1714 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1715 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1716 if (j<temp_constraints) { 1717 PetscInt ii; 1718 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1719 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1720 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1721 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1722 for (k=0;k<temp_constraints-j;k++) { 1723 for (ii=0;ii<size_of_constraint;ii++) { 1724 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1725 } 1726 } 1727 } 1728 #else /* on missing GESVD */ 1729 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1730 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1731 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1732 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1733 #if !defined(PETSC_USE_COMPLEX) 1734 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1735 #else 1736 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1737 #endif 1738 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1739 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1740 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1741 k = temp_constraints; 1742 if (k > size_of_constraint) k = size_of_constraint; 1743 j = 0; 1744 while (j < k && singular_vals[k-j-1] < tol) j++; 1745 total_counts = total_counts-temp_constraints+k-j; 1746 valid_constraints = k-j; 1747 #endif /* on missing GESVD */ 1748 } 1749 /* setting change_of_basis flag is safe now */ 1750 if (boolforchange) { 1751 for (j=0;j<valid_constraints;j++) { 1752 PetscBTSet(change_basis,total_counts-j-1); 1753 } 1754 } 1755 } 1756 /* free index sets of faces, edges and vertices */ 1757 for (i=0;i<n_ISForFaces;i++) { 1758 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1759 } 1760 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1761 for (i=0;i<n_ISForEdges;i++) { 1762 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1763 } 1764 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1765 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1766 /* map temp_indices_to_constraint in boundary numbering */ 1767 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1768 if (i != temp_indices[total_counts]) { 1769 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1770 } 1771 1772 /* free workspace */ 1773 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1774 ierr = PetscFree(work);CHKERRQ(ierr); 1775 #if defined(PETSC_USE_COMPLEX) 1776 ierr = PetscFree(rwork);CHKERRQ(ierr); 1777 #endif 1778 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1779 #if defined(PETSC_MISSING_LAPACK_GESVD) 1780 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1781 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1782 #endif 1783 } 1784 for (k=0;k<nnsp_size;k++) { 1785 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1786 } 1787 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1788 1789 /* set quantities in pcbddc data structure and store previous primal size */ 1790 /* n_vertices defines the number of subdomain corners in the primal space */ 1791 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1792 olocal_primal_size = pcbddc->local_primal_size; 1793 pcbddc->local_primal_size = total_counts; 1794 pcbddc->n_vertices = n_vertices; 1795 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1796 1797 /* Create constraint matrix */ 1798 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1799 /* so we need to set it up properly either with or without change of basis */ 1800 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1801 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1802 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1803 /* array to compute a local numbering of constraints : vertices first then constraints */ 1804 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1805 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1806 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1807 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1808 /* auxiliary stuff for basis change */ 1809 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1810 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1811 1812 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1813 total_primal_vertices=0; 1814 for (i=0;i<pcbddc->local_primal_size;i++) { 1815 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1816 if (size_of_constraint == 1) { 1817 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1818 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1819 aux_primal_minloc[total_primal_vertices]=0; 1820 total_primal_vertices++; 1821 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1822 PetscInt min_loc,min_index; 1823 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1824 /* find first untouched local node */ 1825 k = 0; 1826 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1827 min_index = global_indices[k]; 1828 min_loc = k; 1829 /* search the minimum among global nodes already untouched on the cc */ 1830 for (k=1;k<size_of_constraint;k++) { 1831 /* there can be more than one constraint on a single connected component */ 1832 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1833 min_index = global_indices[k]; 1834 min_loc = k; 1835 } 1836 } 1837 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1838 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1839 aux_primal_minloc[total_primal_vertices]=min_loc; 1840 total_primal_vertices++; 1841 } 1842 } 1843 /* determine if a QR strategy is needed for change of basis */ 1844 qr_needed = PETSC_FALSE; 1845 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1846 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1847 if (PetscBTLookup(change_basis,i)) { 1848 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1849 j = 0; 1850 for (k=0;k<size_of_constraint;k++) { 1851 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1852 j++; 1853 } 1854 } 1855 /* found more than one primal dof on the cc */ 1856 if (j > 1) { 1857 PetscBTSet(qr_needed_idx,i); 1858 qr_needed = PETSC_TRUE; 1859 } 1860 } 1861 } 1862 /* free workspace */ 1863 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1864 1865 /* permute indices in order to have a sorted set of vertices */ 1866 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1867 1868 /* nonzero structure of constraint matrix */ 1869 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1870 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1871 j=total_primal_vertices; 1872 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1873 if (!PetscBTLookup(change_basis,i)) { 1874 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1875 j++; 1876 } 1877 } 1878 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1879 ierr = PetscFree(nnz);CHKERRQ(ierr); 1880 /* set values in constraint matrix */ 1881 for (i=0;i<total_primal_vertices;i++) { 1882 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1883 } 1884 total_counts = total_primal_vertices; 1885 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1886 if (!PetscBTLookup(change_basis,i)) { 1887 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1888 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1889 total_counts++; 1890 } 1891 } 1892 /* assembling */ 1893 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1894 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1895 /* 1896 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1897 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1898 */ 1899 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1900 if (pcbddc->use_change_of_basis) { 1901 /* dual and primal dofs on a single cc */ 1902 PetscInt dual_dofs,primal_dofs; 1903 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1904 PetscInt primal_counter; 1905 /* working stuff for GEQRF */ 1906 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 1907 PetscBLASInt lqr_work; 1908 /* working stuff for UNGQR */ 1909 PetscScalar *gqr_work,lgqr_work_t; 1910 PetscBLASInt lgqr_work; 1911 /* working stuff for TRTRS */ 1912 PetscScalar *trs_rhs; 1913 PetscBLASInt Blas_NRHS; 1914 /* pointers for values insertion into change of basis matrix */ 1915 PetscInt *start_rows,*start_cols; 1916 PetscScalar *start_vals; 1917 /* working stuff for values insertion */ 1918 PetscBT is_primal; 1919 /* matrix sizes */ 1920 PetscInt global_size,local_size; 1921 /* work array for nonzeros */ 1922 PetscScalar *nnz_array; 1923 /* temporary change of basis */ 1924 Mat localChangeOfBasisMatrix; 1925 /* auxiliary work for global change of basis */ 1926 Vec nnz_vec; 1927 PetscInt *idxs_I,*idxs_B,*idxs_all; 1928 PetscInt nvtxs,*xadj,*adjncy,*idxs_mapped; 1929 PetscScalar *vals; 1930 PetscBool done; 1931 1932 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 1933 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 1934 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1935 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1936 1937 /* nonzeros for local mat */ 1938 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 1939 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1940 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1941 if (PetscBTLookup(change_basis,i)) { 1942 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1943 if (PetscBTLookup(qr_needed_idx,i)) { 1944 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1945 } else { 1946 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1947 /* get local primal index on the cc */ 1948 j = 0; 1949 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1950 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1951 } 1952 } 1953 } 1954 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1955 /* Set initial identity in the matrix */ 1956 for (i=0;i<pcis->n_B;i++) { 1957 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1958 } 1959 1960 if (pcbddc->dbg_flag) { 1961 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1962 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1963 } 1964 1965 1966 /* Now we loop on the constraints which need a change of basis */ 1967 /* 1968 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1969 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1970 1971 Basic blocks of change of basis matrix T computed 1972 1973 - Using the following block transformation if there is only a primal dof on the cc 1974 (in the example, primal dof is the last one of the edge in LOCAL ordering 1975 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1976 | 1 0 ... 0 1 | 1977 | 0 1 ... 0 1 | 1978 | ... | 1979 | 0 ... 1 1 | 1980 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1981 1982 - via QR decomposition of constraints otherwise 1983 */ 1984 if (qr_needed) { 1985 /* space to store Q */ 1986 ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr); 1987 /* first we issue queries for optimal work */ 1988 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1989 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1990 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1991 lqr_work = -1; 1992 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1993 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1994 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1995 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 1996 lgqr_work = -1; 1997 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1998 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1999 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2000 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2001 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2002 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2003 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2004 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2005 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2006 /* array to store scaling factors for reflectors */ 2007 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2008 /* array to store rhs and solution of triangular solver */ 2009 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2010 /* allocating workspace for check */ 2011 if (pcbddc->dbg_flag) { 2012 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2013 } 2014 } 2015 /* array to store whether a node is primal or not */ 2016 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2017 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2018 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2019 if (i != total_primal_vertices) { 2020 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2021 } 2022 for (i=0;i<total_primal_vertices;i++) { 2023 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2024 } 2025 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2026 2027 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2028 /* -> using implicit ordering contained in temp_indices data */ 2029 total_counts = pcbddc->n_vertices; 2030 primal_counter = total_counts; 2031 while (total_counts<pcbddc->local_primal_size) { 2032 primal_dofs = 1; 2033 if (PetscBTLookup(change_basis,total_counts)) { 2034 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2035 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2036 primal_dofs++; 2037 } 2038 /* get constraint info */ 2039 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2040 dual_dofs = size_of_constraint-primal_dofs; 2041 2042 if (pcbddc->dbg_flag) { 2043 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2044 } 2045 2046 if (primal_dofs > 1) { /* QR */ 2047 2048 /* copy quadrature constraints for change of basis check */ 2049 if (pcbddc->dbg_flag) { 2050 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2051 } 2052 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2053 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2054 2055 /* compute QR decomposition of constraints */ 2056 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2057 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2058 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2059 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2060 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2061 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2062 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2063 2064 /* explictly compute R^-T */ 2065 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2066 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2067 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2068 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2069 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2070 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2071 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2072 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2073 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2074 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2075 2076 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2077 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2078 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2079 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2080 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2081 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2082 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2083 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2084 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2085 2086 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2087 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2088 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2089 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2090 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2091 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2092 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2093 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2094 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2095 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2096 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2097 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2098 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2099 2100 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2101 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2102 /* insert cols for primal dofs */ 2103 for (j=0;j<primal_dofs;j++) { 2104 start_vals = &qr_basis[j*size_of_constraint]; 2105 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2106 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2107 } 2108 /* insert cols for dual dofs */ 2109 for (j=0,k=0;j<dual_dofs;k++) { 2110 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2111 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2112 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2113 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2114 j++; 2115 } 2116 } 2117 2118 /* check change of basis */ 2119 if (pcbddc->dbg_flag) { 2120 PetscInt ii,jj; 2121 PetscBool valid_qr=PETSC_TRUE; 2122 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2123 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2124 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2125 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2126 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2127 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2128 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2129 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2130 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2131 for (jj=0;jj<size_of_constraint;jj++) { 2132 for (ii=0;ii<primal_dofs;ii++) { 2133 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2134 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2135 } 2136 } 2137 if (!valid_qr) { 2138 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2139 for (jj=0;jj<size_of_constraint;jj++) { 2140 for (ii=0;ii<primal_dofs;ii++) { 2141 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2142 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2143 } 2144 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2145 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2146 } 2147 } 2148 } 2149 } else { 2150 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2151 } 2152 } 2153 } else { /* simple transformation block */ 2154 PetscInt row,col; 2155 PetscScalar val; 2156 for (j=0;j<size_of_constraint;j++) { 2157 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2158 if (!PetscBTLookup(is_primal,row)) { 2159 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2160 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2161 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2162 } else { 2163 for (k=0;k<size_of_constraint;k++) { 2164 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2165 if (row != col) { 2166 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2167 } else { 2168 val = 1.0; 2169 } 2170 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2171 } 2172 } 2173 } 2174 if (pcbddc->dbg_flag) { 2175 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2176 } 2177 } 2178 /* increment primal counter */ 2179 primal_counter += primal_dofs; 2180 } else { 2181 if (pcbddc->dbg_flag) { 2182 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2183 } 2184 } 2185 /* increment constraint counter total_counts */ 2186 total_counts += primal_dofs; 2187 } 2188 2189 /* free workspace */ 2190 if (qr_needed) { 2191 if (pcbddc->dbg_flag) { 2192 ierr = PetscFree(work);CHKERRQ(ierr); 2193 } 2194 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2195 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2196 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2197 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2198 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2199 } 2200 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2201 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2202 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2203 2204 /* assembling of global change of variable */ 2205 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2206 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2207 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2208 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2209 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2210 ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr); 2211 2212 /* nonzeros (overestimated) */ 2213 ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr); 2214 ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr); 2215 ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr); 2216 for (i=0;i<pcis->n;i++) { 2217 nnz_array[i] = 1.0; 2218 idxs_all[i] = i; 2219 } 2220 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2221 for (i=0;i<pcis->n_B;i++) { 2222 nnz_array[idxs_B[i]] = nnz[i]; 2223 } 2224 ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr); 2225 ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr); 2226 ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr); 2227 ierr = PetscFree(nnz);CHKERRQ(ierr); 2228 ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr); 2229 ierr = PetscMalloc1(local_size,&nnz);CHKERRQ(ierr); 2230 ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2231 for (i=0;i<local_size;i++) { 2232 nnz[i] = (PetscInt)(PetscRealPart(nnz_array[i])); 2233 } 2234 ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2235 ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr); 2236 ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz,0,nnz);CHKERRQ(ierr); 2237 ierr = PetscFree(nnz);CHKERRQ(ierr); 2238 2239 /* Set identity on dirichlet dofs */ 2240 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2241 for (i=0;i<pcis->n-pcis->n_B;i++) { 2242 PetscScalar one=1.0; 2243 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr); 2244 } 2245 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2246 2247 /* Set values at interface dofs */ 2248 done = PETSC_TRUE; 2249 ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2250 if (!done) { 2251 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2252 } 2253 ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2254 ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr); 2255 ierr = ISLocalToGlobalMappingApply(pcbddc->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr); 2256 for (i=0;i<nvtxs;i++) { 2257 PetscInt row,*cols,ncols; 2258 PetscScalar *mat_vals; 2259 2260 row = idxs_B[i]; 2261 ncols = xadj[i+1]-xadj[i]; 2262 cols = idxs_mapped+xadj[i]; 2263 mat_vals = vals+xadj[i]; 2264 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr); 2265 } 2266 ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2267 if (!done) { 2268 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2269 } 2270 ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2271 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2272 ierr = PetscFree(idxs_mapped);CHKERRQ(ierr); 2273 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2274 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2275 2276 /* check */ 2277 if (pcbddc->dbg_flag) { 2278 PetscReal error; 2279 Vec x,x_change; 2280 2281 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2282 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2283 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2284 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2285 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2286 ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2287 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 2288 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2289 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2290 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2291 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2292 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2293 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2294 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr); 2295 ierr = VecDestroy(&x);CHKERRQ(ierr); 2296 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2297 } 2298 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2299 2300 /* 2301 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2302 ierr = MatView(local_ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2303 */ 2304 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2305 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2306 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2307 } 2308 2309 /* set up change of basis context */ 2310 if (pcbddc->ChangeOfBasisMatrix) { 2311 PCBDDCChange_ctx change_ctx; 2312 2313 if (!pcbddc->new_global_mat) { 2314 PetscInt global_size,local_size; 2315 2316 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2317 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2318 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2319 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2320 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2321 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2322 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2323 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2324 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2325 } else { 2326 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2327 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2328 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2329 } 2330 if (!pcbddc->user_ChangeOfBasisMatrix) { 2331 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2332 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2333 } else { 2334 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2335 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2336 } 2337 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2338 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2339 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2340 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2341 } 2342 2343 /* get indices in local ordering for vertices and constraints */ 2344 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2345 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2346 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2347 } 2348 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2349 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2350 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2351 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2352 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2353 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2354 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2355 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2356 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2357 /* set quantities in PCBDDC data struct */ 2358 pcbddc->n_actual_vertices = i; 2359 /* check if a new primal space has been introduced */ 2360 pcbddc->new_primal_space_local = PETSC_TRUE; 2361 if (olocal_primal_size == pcbddc->local_primal_size) { 2362 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2363 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2364 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2365 } 2366 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2367 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2368 2369 /* flush dbg viewer */ 2370 if (pcbddc->dbg_flag) { 2371 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2372 } 2373 2374 /* free workspace */ 2375 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2376 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2377 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2378 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2379 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2380 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2381 PetscFunctionReturn(0); 2382 } 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2386 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2387 { 2388 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2389 PC_IS *pcis = (PC_IS*)pc->data; 2390 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2391 PetscInt ierr,i,vertex_size; 2392 PetscViewer viewer=pcbddc->dbg_viewer; 2393 2394 PetscFunctionBegin; 2395 /* Reset previously computed graph */ 2396 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2397 /* Init local Graph struct */ 2398 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2399 2400 /* Check validity of the csr graph passed in by the user */ 2401 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2402 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2403 } 2404 2405 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2406 if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) { 2407 Mat mat_adj; 2408 const PetscInt *xadj,*adjncy; 2409 PetscBool flg_row=PETSC_TRUE; 2410 2411 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2412 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2413 if (!flg_row) { 2414 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2415 } 2416 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2417 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2418 if (!flg_row) { 2419 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2420 } 2421 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2422 } 2423 2424 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2425 vertex_size = 1; 2426 if (pcbddc->user_provided_isfordofs) { 2427 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2428 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2429 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2430 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2431 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2432 } 2433 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2434 pcbddc->n_ISForDofs = 0; 2435 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2436 } 2437 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2438 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2439 } else { 2440 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2441 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2442 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2443 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2444 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2445 } 2446 } 2447 } 2448 2449 /* Setup of Graph */ 2450 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2451 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2452 } 2453 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2454 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2455 } 2456 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2457 2458 /* Graph's connected components analysis */ 2459 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2460 2461 /* print some info to stdout */ 2462 if (pcbddc->dbg_flag) { 2463 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2464 } 2465 2466 /* mark topography has done */ 2467 pcbddc->recompute_topography = PETSC_FALSE; 2468 PetscFunctionReturn(0); 2469 } 2470 2471 #undef __FUNCT__ 2472 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2473 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2474 { 2475 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2476 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2477 PetscErrorCode ierr; 2478 2479 PetscFunctionBegin; 2480 n = 0; 2481 vertices = 0; 2482 if (pcbddc->ConstraintMatrix) { 2483 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2484 for (i=0;i<local_primal_size;i++) { 2485 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2486 if (size_of_constraint == 1) n++; 2487 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2488 } 2489 if (vertices_idx) { 2490 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2491 n = 0; 2492 for (i=0;i<local_primal_size;i++) { 2493 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2494 if (size_of_constraint == 1) { 2495 vertices[n++]=row_cmat_indices[0]; 2496 } 2497 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2498 } 2499 } 2500 } 2501 *n_vertices = n; 2502 if (vertices_idx) *vertices_idx = vertices; 2503 PetscFunctionReturn(0); 2504 } 2505 2506 #undef __FUNCT__ 2507 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2508 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2509 { 2510 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2511 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2512 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2513 PetscBT touched; 2514 PetscErrorCode ierr; 2515 2516 /* This function assumes that the number of local constraints per connected component 2517 is not greater than the number of nodes defined for the connected component 2518 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2519 PetscFunctionBegin; 2520 n = 0; 2521 constraints_index = 0; 2522 if (pcbddc->ConstraintMatrix) { 2523 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2524 max_size_of_constraint = 0; 2525 for (i=0;i<local_primal_size;i++) { 2526 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2527 if (size_of_constraint > 1) { 2528 n++; 2529 } 2530 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2531 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2532 } 2533 if (constraints_idx) { 2534 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2535 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2536 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2537 n = 0; 2538 for (i=0;i<local_primal_size;i++) { 2539 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2540 if (size_of_constraint > 1) { 2541 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2542 /* find first untouched local node */ 2543 j = 0; 2544 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2545 min_index = row_cmat_global_indices[j]; 2546 min_loc = j; 2547 /* search the minimum among nodes not yet touched on the connected component 2548 since there can be more than one constraint on a single cc */ 2549 for (j=1;j<size_of_constraint;j++) { 2550 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2551 min_index = row_cmat_global_indices[j]; 2552 min_loc = j; 2553 } 2554 } 2555 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2556 constraints_index[n++] = row_cmat_indices[min_loc]; 2557 } 2558 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2559 } 2560 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2561 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2562 } 2563 } 2564 *n_constraints = n; 2565 if (constraints_idx) *constraints_idx = constraints_index; 2566 PetscFunctionReturn(0); 2567 } 2568 2569 /* the next two functions has been adapted from pcis.c */ 2570 #undef __FUNCT__ 2571 #define __FUNCT__ "PCBDDCApplySchur" 2572 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2573 { 2574 PetscErrorCode ierr; 2575 PC_IS *pcis = (PC_IS*)(pc->data); 2576 2577 PetscFunctionBegin; 2578 if (!vec2_B) { vec2_B = v; } 2579 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2580 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2581 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2582 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2583 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2584 PetscFunctionReturn(0); 2585 } 2586 2587 #undef __FUNCT__ 2588 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2589 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2590 { 2591 PetscErrorCode ierr; 2592 PC_IS *pcis = (PC_IS*)(pc->data); 2593 2594 PetscFunctionBegin; 2595 if (!vec2_B) { vec2_B = v; } 2596 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2597 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2598 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2599 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2600 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2601 PetscFunctionReturn(0); 2602 } 2603 2604 #undef __FUNCT__ 2605 #define __FUNCT__ "PCBDDCSubsetNumbering" 2606 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2607 { 2608 Vec local_vec,global_vec; 2609 IS seqis,paris; 2610 VecScatter scatter_ctx; 2611 PetscScalar *array; 2612 PetscInt *temp_global_dofs; 2613 PetscScalar globalsum; 2614 PetscInt i,j,s; 2615 PetscInt nlocals,first_index,old_index,max_local; 2616 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2617 PetscMPIInt *dof_sizes,*dof_displs; 2618 PetscBool first_found; 2619 PetscErrorCode ierr; 2620 2621 PetscFunctionBegin; 2622 /* mpi buffers */ 2623 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2624 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2625 j = ( !rank_prec_comm ? size_prec_comm : 0); 2626 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2627 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2628 /* get maximum size of subset */ 2629 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2630 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2631 max_local = 0; 2632 for (i=0;i<n_local_dofs;i++) { 2633 if (max_local < temp_global_dofs[i] ) { 2634 max_local = temp_global_dofs[i]; 2635 } 2636 } 2637 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2638 max_global++; 2639 max_local = 0; 2640 for (i=0;i<n_local_dofs;i++) { 2641 if (max_local < local_dofs[i] ) { 2642 max_local = local_dofs[i]; 2643 } 2644 } 2645 max_local++; 2646 /* allocate workspace */ 2647 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2648 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2649 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2650 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2651 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2652 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2653 /* create scatter */ 2654 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2655 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2656 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2657 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2658 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2659 /* init array */ 2660 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2661 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2662 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2663 if (local_dofs_mult) { 2664 for (i=0;i<n_local_dofs;i++) { 2665 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2666 } 2667 } else { 2668 for (i=0;i<n_local_dofs;i++) { 2669 array[local_dofs[i]]=1.0; 2670 } 2671 } 2672 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2673 /* scatter into global vec and get total number of global dofs */ 2674 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2675 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2676 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2677 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2678 /* Fill global_vec with cumulative function for global numbering */ 2679 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2680 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2681 nlocals = 0; 2682 first_index = -1; 2683 first_found = PETSC_FALSE; 2684 for (i=0;i<s;i++) { 2685 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2686 first_found = PETSC_TRUE; 2687 first_index = i; 2688 } 2689 nlocals += (PetscInt)PetscRealPart(array[i]); 2690 } 2691 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2692 if (!rank_prec_comm) { 2693 dof_displs[0]=0; 2694 for (i=1;i<size_prec_comm;i++) { 2695 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2696 } 2697 } 2698 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2699 if (first_found) { 2700 array[first_index] += (PetscScalar)nlocals; 2701 old_index = first_index; 2702 for (i=first_index+1;i<s;i++) { 2703 if (PetscRealPart(array[i]) > 0.1) { 2704 array[i] += array[old_index]; 2705 old_index = i; 2706 } 2707 } 2708 } 2709 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2710 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2711 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2712 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2713 /* get global ordering of local dofs */ 2714 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2715 if (local_dofs_mult) { 2716 for (i=0;i<n_local_dofs;i++) { 2717 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2718 } 2719 } else { 2720 for (i=0;i<n_local_dofs;i++) { 2721 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2722 } 2723 } 2724 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2725 /* free workspace */ 2726 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2727 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2728 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2729 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2730 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2731 /* return pointer to global ordering of local dofs */ 2732 *global_numbering_subset = temp_global_dofs; 2733 PetscFunctionReturn(0); 2734 } 2735 2736 #undef __FUNCT__ 2737 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2738 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2739 { 2740 PetscInt i,j; 2741 PetscScalar *alphas; 2742 PetscErrorCode ierr; 2743 2744 PetscFunctionBegin; 2745 /* this implements stabilized Gram-Schmidt */ 2746 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2747 for (i=0;i<n;i++) { 2748 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2749 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2750 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2751 } 2752 ierr = PetscFree(alphas);CHKERRQ(ierr); 2753 PetscFunctionReturn(0); 2754 } 2755 2756 /* TODO 2757 - now preallocation is done assuming SEQDENSE local matrices 2758 */ 2759 #undef __FUNCT__ 2760 #define __FUNCT__ "MatISGetMPIXAIJ" 2761 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M) 2762 { 2763 Mat new_mat; 2764 Mat_IS *matis = (Mat_IS*)(mat->data); 2765 /* info on mat */ 2766 /* ISLocalToGlobalMapping rmapping,cmapping; */ 2767 PetscInt bs,rows,cols; 2768 PetscInt lrows,lcols; 2769 PetscInt local_rows,local_cols; 2770 PetscBool isdense; 2771 /* values insertion */ 2772 PetscScalar *array; 2773 PetscInt *local_indices,*global_indices; 2774 /* work */ 2775 PetscInt i,j,index_row; 2776 PetscErrorCode ierr; 2777 2778 PetscFunctionBegin; 2779 /* MISSING CHECKS 2780 - rectangular case not covered (it is not allowed by MATIS) 2781 */ 2782 /* get info from mat */ 2783 /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */ 2784 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2785 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2786 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2787 2788 /* work */ 2789 ierr = PetscMalloc1(local_rows,&local_indices);CHKERRQ(ierr); 2790 for (i=0;i<local_rows;i++) local_indices[i]=i; 2791 /* map indices of local mat to global values */ 2792 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2793 /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */ 2794 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2795 2796 if (reuse==MAT_INITIAL_MATRIX) { 2797 Vec vec_dnz,vec_onz; 2798 PetscScalar *my_dnz,*my_onz; 2799 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2800 PetscInt index_col,owner; 2801 PetscMPIInt nsubdomains; 2802 2803 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2804 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2805 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2806 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2807 ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr); 2808 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2809 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2810 2811 /* 2812 preallocation 2813 */ 2814 2815 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2816 /* 2817 Some vectors are needed to sum up properly on shared interface dofs. 2818 Preallocation macros cannot do the job. 2819 Note that preallocation is not exact, since it overestimates nonzeros 2820 */ 2821 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2822 /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */ 2823 ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr); 2824 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2825 /* All processes need to compute entire row ownership */ 2826 ierr = PetscMalloc1(rows,&row_ownership);CHKERRQ(ierr); 2827 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2828 for (i=0;i<nsubdomains;i++) { 2829 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2830 row_ownership[j]=i; 2831 } 2832 } 2833 2834 /* 2835 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2836 then, they will be summed up properly. This way, preallocation is always sufficient 2837 */ 2838 ierr = PetscMalloc1(local_rows,&my_dnz);CHKERRQ(ierr); 2839 ierr = PetscMalloc1(local_rows,&my_onz);CHKERRQ(ierr); 2840 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2841 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2842 for (i=0;i<local_rows;i++) { 2843 index_row = global_indices[i]; 2844 for (j=i;j<local_rows;j++) { 2845 owner = row_ownership[index_row]; 2846 index_col = global_indices[j]; 2847 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2848 my_dnz[i] += 1.0; 2849 } else { /* offdiag block */ 2850 my_onz[i] += 1.0; 2851 } 2852 /* same as before, interchanging rows and cols */ 2853 if (i != j) { 2854 owner = row_ownership[index_col]; 2855 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2856 my_dnz[j] += 1.0; 2857 } else { 2858 my_onz[j] += 1.0; 2859 } 2860 } 2861 } 2862 } 2863 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2864 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2865 if (local_rows) { /* multilevel guard */ 2866 ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2867 ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2868 } 2869 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2870 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2871 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2872 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2873 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2874 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2875 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2876 2877 /* set computed preallocation in dnz and onz */ 2878 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2879 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2880 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2881 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2882 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2883 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2884 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2885 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2886 2887 /* Resize preallocation if overestimated */ 2888 for (i=0;i<lrows;i++) { 2889 dnz[i] = PetscMin(dnz[i],lcols); 2890 onz[i] = PetscMin(onz[i],cols-lcols); 2891 } 2892 /* set preallocation */ 2893 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2894 for (i=0;i<lrows/bs;i++) { 2895 dnz[i] = dnz[i*bs]/bs; 2896 onz[i] = onz[i*bs]/bs; 2897 } 2898 ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2899 for (i=0;i<lrows/bs;i++) { 2900 dnz[i] = dnz[i]-i; 2901 } 2902 ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2903 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2904 *M = new_mat; 2905 } else { 2906 PetscInt mbs,mrows,mcols; 2907 /* some checks */ 2908 ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); 2909 ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); 2910 if (mrows != rows) { 2911 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); 2912 } 2913 if (mrows != rows) { 2914 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); 2915 } 2916 if (mbs != bs) { 2917 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); 2918 } 2919 ierr = MatZeroEntries(*M);CHKERRQ(ierr); 2920 } 2921 /* set local to global mappings */ 2922 /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */ 2923 /* Set values */ 2924 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2925 if (isdense) { /* special case for dense local matrices */ 2926 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2927 ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr); 2928 ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2929 ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr); 2930 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2931 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2932 } else { /* very basic values insertion for all other matrix types */ 2933 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2934 for (i=0;i<local_rows;i++) { 2935 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2936 /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */ 2937 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2938 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2939 ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2940 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2941 } 2942 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2943 } 2944 ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2945 ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2946 if (isdense) { 2947 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2948 } 2949 PetscFunctionReturn(0); 2950 } 2951 2952 #undef __FUNCT__ 2953 #define __FUNCT__ "MatISGetSubassemblingPattern" 2954 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2955 { 2956 Mat subdomain_adj; 2957 IS new_ranks,ranks_send_to; 2958 MatPartitioning partitioner; 2959 Mat_IS *matis; 2960 PetscInt n_neighs,*neighs,*n_shared,**shared; 2961 PetscInt prank; 2962 PetscMPIInt size,rank,color; 2963 PetscInt *xadj,*adjncy,*oldranks; 2964 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2965 PetscInt i,j,local_size,threshold=0; 2966 PetscErrorCode ierr; 2967 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2968 PetscSubcomm subcomm; 2969 2970 PetscFunctionBegin; 2971 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2972 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2973 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2974 2975 /* Get info on mapping */ 2976 matis = (Mat_IS*)(mat->data); 2977 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2978 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2979 2980 /* build local CSR graph of subdomains' connectivity */ 2981 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2982 xadj[0] = 0; 2983 xadj[1] = PetscMax(n_neighs-1,0); 2984 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2985 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2986 2987 if (threshold) { 2988 PetscInt* count,min_threshold; 2989 ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr); 2990 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2991 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2992 for (j=0;j<n_shared[i];j++) { 2993 count[shared[i][j]] += 1; 2994 } 2995 } 2996 /* adapt threshold since we dont want to lose significant connections */ 2997 min_threshold = n_neighs; 2998 for (i=1;i<n_neighs;i++) { 2999 for (j=0;j<n_shared[i];j++) { 3000 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 3001 } 3002 } 3003 threshold = PetscMax(min_threshold+1,threshold); 3004 xadj[1] = 0; 3005 for (i=1;i<n_neighs;i++) { 3006 for (j=0;j<n_shared[i];j++) { 3007 if (count[shared[i][j]] < threshold) { 3008 adjncy[xadj[1]] = neighs[i]; 3009 adjncy_wgt[xadj[1]] = n_shared[i]; 3010 xadj[1]++; 3011 break; 3012 } 3013 } 3014 } 3015 ierr = PetscFree(count);CHKERRQ(ierr); 3016 } else { 3017 if (xadj[1]) { 3018 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3019 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3020 } 3021 } 3022 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3023 if (use_square) { 3024 for (i=0;i<xadj[1];i++) { 3025 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3026 } 3027 } 3028 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3029 3030 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 3031 3032 /* 3033 Restrict work on active processes only. 3034 */ 3035 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3036 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3037 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3038 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3039 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3040 if (color) { 3041 ierr = PetscFree(xadj);CHKERRQ(ierr); 3042 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3043 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3044 } else { 3045 PetscInt coarsening_ratio; 3046 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 3047 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3048 prank = rank; 3049 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 3050 /* 3051 for (i=0;i<size;i++) { 3052 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3053 } 3054 */ 3055 for (i=0;i<xadj[1];i++) { 3056 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3057 } 3058 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3059 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3060 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3061 3062 /* Partition */ 3063 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 3064 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3065 if (use_vwgt) { 3066 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 3067 v_wgt[0] = local_size; 3068 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3069 } 3070 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3071 coarsening_ratio = size/n_subdomains; 3072 /* Parmetis does not always give back nparts with small graphs! this should be taken into account */ 3073 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3074 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3075 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3076 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3077 3078 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3079 if (contiguous) { 3080 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3081 } else { 3082 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3083 } 3084 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3085 /* clean up */ 3086 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3087 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3088 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3089 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3090 } 3091 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3092 3093 /* assemble parallel IS for sends */ 3094 i = 1; 3095 if (color) i=0; 3096 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3097 3098 /* get back IS */ 3099 *is_sends = ranks_send_to; 3100 PetscFunctionReturn(0); 3101 } 3102 3103 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3104 3105 #undef __FUNCT__ 3106 #define __FUNCT__ "MatISSubassemble" 3107 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3108 { 3109 Mat local_mat; 3110 Mat_IS *matis; 3111 IS is_sends_internal; 3112 PetscInt rows,cols; 3113 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3114 PetscBool ismatis,isdense,destroy_mat; 3115 ISLocalToGlobalMapping l2gmap; 3116 PetscInt* l2gmap_indices; 3117 const PetscInt* is_indices; 3118 MatType new_local_type; 3119 /* buffers */ 3120 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3121 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3122 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3123 /* MPI */ 3124 MPI_Comm comm,comm_n; 3125 PetscSubcomm subcomm; 3126 PetscMPIInt n_sends,n_recvs,commsize; 3127 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3128 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3129 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3130 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3131 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3132 PetscErrorCode ierr; 3133 3134 PetscFunctionBegin; 3135 /* TODO: add missing checks */ 3136 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3137 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3138 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3139 PetscValidLogicalCollectiveInt(mat,nis,7); 3140 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3141 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3142 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3143 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3144 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3145 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3146 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3147 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3148 PetscInt mrows,mcols,mnrows,mncols; 3149 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3150 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3151 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3152 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3153 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3154 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3155 } 3156 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3157 PetscValidLogicalCollectiveInt(mat,bs,0); 3158 /* prepare IS for sending if not provided */ 3159 if (!is_sends) { 3160 PetscBool pcontig = PETSC_TRUE; 3161 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3162 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3163 } else { 3164 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3165 is_sends_internal = is_sends; 3166 } 3167 3168 /* get pointer of MATIS data */ 3169 matis = (Mat_IS*)mat->data; 3170 3171 /* get comm */ 3172 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3173 3174 /* compute number of sends */ 3175 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3176 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3177 3178 /* compute number of receives */ 3179 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3180 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3181 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3182 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3183 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3184 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3185 ierr = PetscFree(iflags);CHKERRQ(ierr); 3186 3187 /* restrict comm if requested */ 3188 subcomm = 0; 3189 destroy_mat = PETSC_FALSE; 3190 if (restrict_comm) { 3191 PetscMPIInt color,rank,subcommsize; 3192 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3193 color = 0; 3194 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3195 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3196 subcommsize = commsize - subcommsize; 3197 /* check if reuse has been requested */ 3198 if (reuse == MAT_REUSE_MATRIX) { 3199 if (*mat_n) { 3200 PetscMPIInt subcommsize2; 3201 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3202 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3203 comm_n = PetscObjectComm((PetscObject)*mat_n); 3204 } else { 3205 comm_n = PETSC_COMM_SELF; 3206 } 3207 } else { /* MAT_INITIAL_MATRIX */ 3208 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3209 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3210 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3211 comm_n = subcomm->comm; 3212 } 3213 /* flag to destroy *mat_n if not significative */ 3214 if (color) destroy_mat = PETSC_TRUE; 3215 } else { 3216 comm_n = comm; 3217 } 3218 3219 /* prepare send/receive buffers */ 3220 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3221 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3222 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3223 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3224 if (nis) { 3225 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr); 3226 ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr); 3227 } 3228 3229 /* Get data from local matrices */ 3230 if (!isdense) { 3231 /* TODO: See below some guidelines on how to prepare the local buffers */ 3232 /* 3233 send_buffer_vals should contain the raw values of the local matrix 3234 send_buffer_idxs should contain: 3235 - MatType_PRIVATE type 3236 - PetscInt size_of_l2gmap 3237 - PetscInt global_row_indices[size_of_l2gmap] 3238 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3239 */ 3240 } else { 3241 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3242 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3243 ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr); 3244 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3245 send_buffer_idxs[1] = i; 3246 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3247 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3248 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3249 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3250 for (i=0;i<n_sends;i++) { 3251 ilengths_vals[is_indices[i]] = len*len; 3252 ilengths_idxs[is_indices[i]] = len+2; 3253 } 3254 } 3255 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3256 /* additional is (if any) */ 3257 if (nis) { 3258 PetscMPIInt psum; 3259 PetscInt j; 3260 for (j=0,psum=0;j<nis;j++) { 3261 PetscInt plen; 3262 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3263 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3264 psum += len+1; /* indices + lenght */ 3265 } 3266 ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr); 3267 for (j=0,psum=0;j<nis;j++) { 3268 PetscInt plen; 3269 const PetscInt *is_array_idxs; 3270 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3271 send_buffer_idxs_is[psum] = plen; 3272 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3273 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3274 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3275 psum += plen+1; /* indices + lenght */ 3276 } 3277 for (i=0;i<n_sends;i++) { 3278 ilengths_idxs_is[is_indices[i]] = psum; 3279 } 3280 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3281 } 3282 3283 buf_size_idxs = 0; 3284 buf_size_vals = 0; 3285 buf_size_idxs_is = 0; 3286 for (i=0;i<n_recvs;i++) { 3287 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3288 buf_size_vals += (PetscInt)olengths_vals[i]; 3289 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3290 } 3291 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3292 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3293 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3294 3295 /* get new tags for clean communications */ 3296 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3297 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3298 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3299 3300 /* allocate for requests */ 3301 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3302 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3303 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3304 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3305 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3306 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3307 3308 /* communications */ 3309 ptr_idxs = recv_buffer_idxs; 3310 ptr_vals = recv_buffer_vals; 3311 ptr_idxs_is = recv_buffer_idxs_is; 3312 for (i=0;i<n_recvs;i++) { 3313 source_dest = onodes[i]; 3314 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3315 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3316 ptr_idxs += olengths_idxs[i]; 3317 ptr_vals += olengths_vals[i]; 3318 if (nis) { 3319 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3320 ptr_idxs_is += olengths_idxs_is[i]; 3321 } 3322 } 3323 for (i=0;i<n_sends;i++) { 3324 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3325 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3326 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3327 if (nis) { 3328 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3329 } 3330 } 3331 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3332 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3333 3334 /* assemble new l2g map */ 3335 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3336 ptr_idxs = recv_buffer_idxs; 3337 buf_size_idxs = 0; 3338 for (i=0;i<n_recvs;i++) { 3339 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3340 ptr_idxs += olengths_idxs[i]; 3341 } 3342 ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr); 3343 ptr_idxs = recv_buffer_idxs; 3344 buf_size_idxs = 0; 3345 for (i=0;i<n_recvs;i++) { 3346 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3347 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3348 ptr_idxs += olengths_idxs[i]; 3349 } 3350 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3351 ierr = ISLocalToGlobalMappingCreate(comm_n,1,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3352 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3353 3354 /* infer new local matrix type from received local matrices type */ 3355 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3356 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3357 if (n_recvs) { 3358 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3359 ptr_idxs = recv_buffer_idxs; 3360 for (i=0;i<n_recvs;i++) { 3361 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3362 new_local_type_private = MATAIJ_PRIVATE; 3363 break; 3364 } 3365 ptr_idxs += olengths_idxs[i]; 3366 } 3367 switch (new_local_type_private) { 3368 case MATDENSE_PRIVATE: 3369 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3370 new_local_type = MATSEQAIJ; 3371 bs = 1; 3372 } else { /* if I receive only 1 dense matrix */ 3373 new_local_type = MATSEQDENSE; 3374 bs = 1; 3375 } 3376 break; 3377 case MATAIJ_PRIVATE: 3378 new_local_type = MATSEQAIJ; 3379 bs = 1; 3380 break; 3381 case MATBAIJ_PRIVATE: 3382 new_local_type = MATSEQBAIJ; 3383 break; 3384 case MATSBAIJ_PRIVATE: 3385 new_local_type = MATSEQSBAIJ; 3386 break; 3387 default: 3388 SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3389 break; 3390 } 3391 } else { /* by default, new_local_type is seqdense */ 3392 new_local_type = MATSEQDENSE; 3393 bs = 1; 3394 } 3395 3396 /* create MATIS object if needed */ 3397 if (reuse == MAT_INITIAL_MATRIX) { 3398 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3399 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3400 } else { 3401 /* it also destroys the local matrices */ 3402 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3403 } 3404 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3405 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3406 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3407 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3408 3409 /* set values */ 3410 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3411 ptr_vals = recv_buffer_vals; 3412 ptr_idxs = recv_buffer_idxs; 3413 for (i=0;i<n_recvs;i++) { 3414 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3415 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3416 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3417 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3418 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3419 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3420 } else { 3421 /* TODO */ 3422 } 3423 ptr_idxs += olengths_idxs[i]; 3424 ptr_vals += olengths_vals[i]; 3425 } 3426 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3427 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3428 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3429 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3430 3431 #if 0 3432 if (!restrict_comm) { /* check */ 3433 Vec lvec,rvec; 3434 PetscReal infty_error; 3435 3436 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3437 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3438 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3439 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3440 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3441 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3442 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3443 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3444 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3445 } 3446 #endif 3447 3448 /* assemble new additional is (if any) */ 3449 if (nis) { 3450 PetscInt **temp_idxs,*count_is,j,psum; 3451 3452 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3453 ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr); 3454 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3455 ptr_idxs = recv_buffer_idxs_is; 3456 psum = 0; 3457 for (i=0;i<n_recvs;i++) { 3458 for (j=0;j<nis;j++) { 3459 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3460 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3461 psum += plen; 3462 ptr_idxs += plen+1; /* shift pointer to received data */ 3463 } 3464 } 3465 ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr); 3466 ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr); 3467 for (i=1;i<nis;i++) { 3468 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3469 } 3470 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3471 ptr_idxs = recv_buffer_idxs_is; 3472 for (i=0;i<n_recvs;i++) { 3473 for (j=0;j<nis;j++) { 3474 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3475 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3476 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3477 ptr_idxs += plen+1; /* shift pointer to received data */ 3478 } 3479 } 3480 for (i=0;i<nis;i++) { 3481 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3482 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3483 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3484 } 3485 ierr = PetscFree(count_is);CHKERRQ(ierr); 3486 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3487 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3488 } 3489 /* free workspace */ 3490 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3491 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3492 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3493 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3494 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3495 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3496 if (isdense) { 3497 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3498 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3499 } else { 3500 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3501 } 3502 if (nis) { 3503 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3504 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3505 } 3506 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3507 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3508 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3509 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3510 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3511 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3512 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3513 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3514 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3515 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3516 ierr = PetscFree(onodes);CHKERRQ(ierr); 3517 if (nis) { 3518 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3519 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3520 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3521 } 3522 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3523 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3524 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3525 for (i=0;i<nis;i++) { 3526 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3527 } 3528 } 3529 PetscFunctionReturn(0); 3530 } 3531 3532 /* temporary hack into ksp private data structure */ 3533 #include <petsc-private/kspimpl.h> 3534 3535 #undef __FUNCT__ 3536 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3537 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3538 { 3539 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3540 PC_IS *pcis = (PC_IS*)pc->data; 3541 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3542 MatNullSpace CoarseNullSpace=NULL; 3543 ISLocalToGlobalMapping coarse_islg; 3544 IS coarse_is,*isarray; 3545 PetscInt i,im_active=-1,active_procs=-1; 3546 PetscInt nis,nisdofs,nisneu; 3547 PC pc_temp; 3548 PCType coarse_pc_type; 3549 KSPType coarse_ksp_type; 3550 PetscBool multilevel_requested,multilevel_allowed; 3551 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3552 Mat t_coarse_mat_is; 3553 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3554 PetscMPIInt all_procs; 3555 PetscBool csin_ml,csin_ds,csin,csin_type_simple; 3556 PetscBool compute_vecs = PETSC_FALSE; 3557 PetscErrorCode ierr; 3558 3559 PetscFunctionBegin; 3560 /* Assign global numbering to coarse dofs */ 3561 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3562 compute_vecs = PETSC_TRUE; 3563 PetscInt ocoarse_size; 3564 ocoarse_size = pcbddc->coarse_size; 3565 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3566 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3567 /* see if we can avoid some work */ 3568 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3569 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3570 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3571 coarse_reuse = PETSC_FALSE; 3572 } else { /* we can safely reuse already computed coarse matrix */ 3573 coarse_reuse = PETSC_TRUE; 3574 } 3575 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3576 coarse_reuse = PETSC_FALSE; 3577 } 3578 /* reset any subassembling information */ 3579 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3580 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3581 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3582 coarse_reuse = PETSC_TRUE; 3583 } 3584 3585 /* count "active" (i.e. with positive local size) and "void" processes */ 3586 im_active = !!(pcis->n); 3587 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3588 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3589 void_procs = all_procs-active_procs; 3590 csin_type_simple = PETSC_TRUE; 3591 if (pcbddc->current_level) { 3592 csin_ml = PETSC_TRUE; 3593 ncoarse_ml = void_procs; 3594 csin_ds = PETSC_TRUE; 3595 ncoarse_ds = void_procs; 3596 if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 3597 } else { 3598 csin_ml = PETSC_FALSE; 3599 ncoarse_ml = all_procs; 3600 if (void_procs) { 3601 csin_ds = PETSC_TRUE; 3602 ncoarse_ds = void_procs; 3603 csin_type_simple = PETSC_FALSE; 3604 } else { 3605 csin_ds = PETSC_FALSE; 3606 ncoarse_ds = all_procs; 3607 } 3608 } 3609 3610 /* 3611 test if we can go multilevel: three conditions must be satisfied: 3612 - we have not exceeded the number of levels requested 3613 - we can actually subassemble the active processes 3614 - we can find a suitable number of MPI processes where we can place the subassembled problem 3615 */ 3616 multilevel_allowed = PETSC_FALSE; 3617 multilevel_requested = PETSC_FALSE; 3618 if (pcbddc->current_level < pcbddc->max_levels) { 3619 multilevel_requested = PETSC_TRUE; 3620 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3621 multilevel_allowed = PETSC_FALSE; 3622 } else { 3623 multilevel_allowed = PETSC_TRUE; 3624 } 3625 } 3626 /* determine number of process partecipating to coarse solver */ 3627 if (multilevel_allowed) { 3628 ncoarse = ncoarse_ml; 3629 csin = csin_ml; 3630 } else { 3631 ncoarse = ncoarse_ds; 3632 csin = csin_ds; 3633 } 3634 3635 /* creates temporary l2gmap and IS for coarse indexes */ 3636 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3637 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3638 3639 /* creates temporary MATIS object for coarse matrix */ 3640 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3641 #if 0 3642 { 3643 PetscViewer viewer; 3644 char filename[256]; 3645 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3646 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3647 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3648 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3649 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3650 } 3651 #endif 3652 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3653 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3654 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3655 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3656 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3657 3658 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3659 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3660 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3661 const PetscInt *idxs; 3662 ISLocalToGlobalMapping tmap; 3663 3664 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3665 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3666 /* allocate space for temporary storage */ 3667 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3668 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3669 /* allocate for IS array */ 3670 nisdofs = pcbddc->n_ISForDofsLocal; 3671 nisneu = !!pcbddc->NeumannBoundariesLocal; 3672 nis = nisdofs + nisneu; 3673 ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr); 3674 /* dofs splitting */ 3675 for (i=0;i<nisdofs;i++) { 3676 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3677 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3678 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3679 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3680 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3681 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3682 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3683 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3684 } 3685 /* neumann boundaries */ 3686 if (pcbddc->NeumannBoundariesLocal) { 3687 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3688 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3689 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3690 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3691 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3692 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3693 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3694 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3695 } 3696 /* free memory */ 3697 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3698 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3699 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3700 } else { 3701 nis = 0; 3702 nisdofs = 0; 3703 nisneu = 0; 3704 isarray = NULL; 3705 } 3706 /* destroy no longer needed map */ 3707 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3708 3709 /* restrict on coarse candidates (if needed) */ 3710 coarse_mat_is = NULL; 3711 if (csin) { 3712 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3713 PetscInt j,tissize,*nisindices; 3714 PetscInt *coarse_candidates; 3715 const PetscInt* tisindices; 3716 /* get coarse candidates' ranks in pc communicator */ 3717 ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr); 3718 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3719 for (i=0,j=0;i<all_procs;i++) { 3720 if (!coarse_candidates[i]) { 3721 coarse_candidates[j]=i; 3722 j++; 3723 } 3724 } 3725 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3726 /* get a suitable subassembling pattern */ 3727 if (csin_type_simple) { 3728 PetscMPIInt rank; 3729 PetscInt issize,isidx; 3730 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3731 if (im_active) { 3732 issize = 1; 3733 isidx = (PetscInt)rank; 3734 } else { 3735 issize = 0; 3736 isidx = -1; 3737 } 3738 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3739 } else { 3740 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3741 } 3742 if (pcbddc->dbg_flag) { 3743 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3744 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3745 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3746 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3747 for (i=0;i<j;i++) { 3748 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3749 } 3750 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3751 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3752 } 3753 /* shift the pattern on coarse candidates */ 3754 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3755 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3756 ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr); 3757 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3758 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3759 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3760 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3761 } 3762 if (pcbddc->dbg_flag) { 3763 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3764 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3765 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3766 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3767 } 3768 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3769 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3770 } else { 3771 if (pcbddc->dbg_flag) { 3772 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3773 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3774 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3775 } 3776 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3777 coarse_mat_is = t_coarse_mat_is; 3778 } 3779 3780 /* create local to global scatters for coarse problem */ 3781 if (compute_vecs) { 3782 PetscInt lrows; 3783 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3784 if (coarse_mat_is) { 3785 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3786 } else { 3787 lrows = 0; 3788 } 3789 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3790 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3791 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3792 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3793 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3794 } 3795 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3796 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3797 3798 /* set defaults for coarse KSP and PC */ 3799 if (multilevel_allowed) { 3800 coarse_ksp_type = KSPRICHARDSON; 3801 coarse_pc_type = PCBDDC; 3802 } else { 3803 coarse_ksp_type = KSPPREONLY; 3804 coarse_pc_type = PCREDUNDANT; 3805 } 3806 3807 /* print some info if requested */ 3808 if (pcbddc->dbg_flag) { 3809 if (!multilevel_allowed) { 3810 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3811 if (multilevel_requested) { 3812 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3813 } else if (pcbddc->max_levels) { 3814 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3815 } 3816 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3817 } 3818 } 3819 3820 /* create the coarse KSP object only once with defaults */ 3821 if (coarse_mat_is) { 3822 MatReuse coarse_mat_reuse; 3823 PetscViewer dbg_viewer; 3824 if (pcbddc->dbg_flag) { 3825 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3826 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3827 } 3828 if (!pcbddc->coarse_ksp) { 3829 char prefix[256],str_level[16]; 3830 size_t len; 3831 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3832 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3833 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3834 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3835 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3836 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3837 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3838 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3839 /* prefix */ 3840 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3841 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3842 if (!pcbddc->current_level) { 3843 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3844 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3845 } else { 3846 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3847 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3848 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3849 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3850 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3851 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3852 } 3853 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3854 /* allow user customization */ 3855 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3856 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3857 } 3858 3859 /* get some info after set from options */ 3860 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3861 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3862 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3863 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3864 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3865 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3866 isbddc = PETSC_FALSE; 3867 } 3868 if (isredundant) { 3869 KSP inner_ksp; 3870 PC inner_pc; 3871 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3872 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3873 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3874 } 3875 3876 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3877 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3878 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3879 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3880 if (nisdofs) { 3881 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3882 for (i=0;i<nisdofs;i++) { 3883 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3884 } 3885 } 3886 if (nisneu) { 3887 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3888 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3889 } 3890 3891 /* assemble coarse matrix */ 3892 if (coarse_reuse) { 3893 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3894 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3895 coarse_mat_reuse = MAT_REUSE_MATRIX; 3896 } else { 3897 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3898 } 3899 if (isbddc || isnn) { 3900 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3901 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3902 if (pcbddc->dbg_flag) { 3903 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3904 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3905 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3906 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3907 } 3908 } 3909 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3910 } else { 3911 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3912 } 3913 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3914 3915 /* propagate symmetry info to coarse matrix */ 3916 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3917 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3918 3919 /* set operators */ 3920 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3921 if (pcbddc->dbg_flag) { 3922 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3923 } 3924 } else { /* processes non partecipating to coarse solver (if any) */ 3925 coarse_mat = 0; 3926 } 3927 ierr = PetscFree(isarray);CHKERRQ(ierr); 3928 #if 0 3929 { 3930 PetscViewer viewer; 3931 char filename[256]; 3932 sprintf(filename,"coarse_mat.m"); 3933 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3934 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3935 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3936 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3937 } 3938 #endif 3939 3940 /* Compute coarse null space (special handling by BDDC only) */ 3941 if (pcbddc->NullSpace) { 3942 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3943 } 3944 3945 if (pcbddc->coarse_ksp) { 3946 Vec crhs,csol; 3947 PetscBool ispreonly; 3948 if (CoarseNullSpace) { 3949 if (isbddc) { 3950 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3951 } else { 3952 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3953 } 3954 } 3955 /* setup coarse ksp */ 3956 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3957 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3958 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3959 /* hack */ 3960 if (!csol) { 3961 ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3962 } 3963 if (!crhs) { 3964 ierr = MatGetVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3965 } 3966 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3967 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3968 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3969 KSP check_ksp; 3970 KSPType check_ksp_type; 3971 PC check_pc; 3972 Vec check_vec,coarse_vec; 3973 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3974 PetscInt its; 3975 PetscBool compute_eigs; 3976 PetscReal *eigs_r,*eigs_c; 3977 PetscInt neigs; 3978 const char *prefix; 3979 3980 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3981 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3982 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3983 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3984 if (ispreonly) { 3985 check_ksp_type = KSPPREONLY; 3986 compute_eigs = PETSC_FALSE; 3987 } else { 3988 check_ksp_type = KSPGMRES; 3989 compute_eigs = PETSC_TRUE; 3990 } 3991 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3992 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3993 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3994 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3995 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 3996 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 3997 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 3998 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 3999 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4000 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4001 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4002 /* create random vec */ 4003 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4004 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4005 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4006 if (CoarseNullSpace) { 4007 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4008 } 4009 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4010 /* solve coarse problem */ 4011 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4012 if (CoarseNullSpace) { 4013 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4014 } 4015 /* set eigenvalue estimation if preonly has not been requested */ 4016 if (compute_eigs) { 4017 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr); 4018 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr); 4019 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4020 lambda_max = eigs_r[neigs-1]; 4021 lambda_min = eigs_r[0]; 4022 if (pcbddc->use_coarse_estimates) { 4023 if (lambda_max>lambda_min) { 4024 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4025 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4026 } 4027 } 4028 } 4029 4030 /* check coarse problem residual error */ 4031 if (pcbddc->dbg_flag) { 4032 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4033 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4034 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4035 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4036 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4037 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4038 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4039 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4040 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4041 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4042 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4043 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4044 if (compute_eigs) { 4045 PetscReal lambda_max_s,lambda_min_s; 4046 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4047 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4048 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4049 for (i=0;i<neigs;i++) { 4050 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4051 } 4052 } 4053 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4054 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4055 } 4056 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4057 if (compute_eigs) { 4058 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4059 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4060 } 4061 } 4062 } 4063 /* print additional info */ 4064 if (pcbddc->dbg_flag) { 4065 /* waits until all processes reaches this point */ 4066 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4067 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4068 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4069 } 4070 4071 /* free memory */ 4072 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4073 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4074 PetscFunctionReturn(0); 4075 } 4076 4077 #undef __FUNCT__ 4078 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4079 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4080 { 4081 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4082 PC_IS* pcis = (PC_IS*)pc->data; 4083 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4084 PetscInt i,coarse_size; 4085 PetscInt *local_primal_indices; 4086 PetscErrorCode ierr; 4087 4088 PetscFunctionBegin; 4089 /* Compute global number of coarse dofs */ 4090 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4091 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4092 } 4093 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4094 4095 /* check numbering */ 4096 if (pcbddc->dbg_flag) { 4097 PetscScalar coarsesum,*array; 4098 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4099 4100 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4101 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4102 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4103 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4104 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4105 for (i=0;i<pcbddc->local_primal_size;i++) { 4106 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4107 } 4108 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4109 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4110 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4111 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4112 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4113 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4114 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4115 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4116 for (i=0;i<pcis->n;i++) { 4117 if (array[i] == 1.0) { 4118 set_error = PETSC_TRUE; 4119 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4120 } 4121 } 4122 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4123 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4124 for (i=0;i<pcis->n;i++) { 4125 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4126 } 4127 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4128 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4129 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4130 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4131 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4132 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4133 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4134 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4135 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4136 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4137 for (i=0;i<pcbddc->local_primal_size;i++) { 4138 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4139 } 4140 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4141 } 4142 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4143 if (set_error_reduced) { 4144 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4145 } 4146 } 4147 /* get back data */ 4148 *coarse_size_n = coarse_size; 4149 *local_primal_indices_n = local_primal_indices; 4150 PetscFunctionReturn(0); 4151 } 4152 4153 #undef __FUNCT__ 4154 #define __FUNCT__ "PCBDDCGlobalToLocal" 4155 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4156 { 4157 IS localis_t; 4158 PetscInt i,lsize,*idxs,n; 4159 PetscScalar *vals; 4160 PetscErrorCode ierr; 4161 4162 PetscFunctionBegin; 4163 /* get indices in local ordering exploiting local to global map */ 4164 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4165 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 4166 for (i=0;i<lsize;i++) vals[i] = 1.0; 4167 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4168 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4169 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4170 if (idxs) { /* multilevel guard */ 4171 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4172 } 4173 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4174 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4175 ierr = PetscFree(vals);CHKERRQ(ierr); 4176 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4177 /* now compute set in local ordering */ 4178 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4179 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4180 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4181 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4182 for (i=0,lsize=0;i<n;i++) { 4183 if (PetscRealPart(vals[i]) > 0.5) { 4184 lsize++; 4185 } 4186 } 4187 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 4188 for (i=0,lsize=0;i<n;i++) { 4189 if (PetscRealPart(vals[i]) > 0.5) { 4190 idxs[lsize++] = i; 4191 } 4192 } 4193 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4194 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4195 *localis = localis_t; 4196 PetscFunctionReturn(0); 4197 } 4198 4199 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4200 #undef __FUNCT__ 4201 #define __FUNCT__ "PCBDDCMatMult_Private" 4202 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4203 { 4204 PCBDDCChange_ctx change_ctx; 4205 PetscErrorCode ierr; 4206 4207 PetscFunctionBegin; 4208 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4209 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4210 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4211 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4212 PetscFunctionReturn(0); 4213 } 4214 4215 #undef __FUNCT__ 4216 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4217 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4218 { 4219 PCBDDCChange_ctx change_ctx; 4220 PetscErrorCode ierr; 4221 4222 PetscFunctionBegin; 4223 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4224 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4225 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4226 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4227 PetscFunctionReturn(0); 4228 } 4229