1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 107 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 108 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 110 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 111 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 112 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 118 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 119 { 120 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 121 PC_IS *pcis = (PC_IS*)pc->data; 122 VecType impVecType; 123 PetscInt n_constraints,n_R,old_size; 124 PetscErrorCode ierr; 125 126 PetscFunctionBegin; 127 if (!pcbddc->ConstraintMatrix) { 128 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 129 } 130 /* get sizes */ 131 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 132 n_R = pcis->n-pcbddc->n_actual_vertices; 133 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 134 /* local work vectors (try to avoid unneeded work)*/ 135 /* R nodes */ 136 old_size = -1; 137 if (pcbddc->vec1_R) { 138 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 139 } 140 if (n_R != old_size) { 141 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 142 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 143 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 144 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 145 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 146 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 147 } 148 /* local primal dofs */ 149 old_size = -1; 150 if (pcbddc->vec1_P) { 151 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 152 } 153 if (pcbddc->local_primal_size != old_size) { 154 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 155 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 156 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 157 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 158 } 159 /* local explicit constraints */ 160 old_size = -1; 161 if (pcbddc->vec1_C) { 162 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 163 } 164 if (n_constraints && n_constraints != old_size) { 165 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 166 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 167 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 168 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "PCBDDCSetUpCorrection" 175 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 176 { 177 PetscErrorCode ierr; 178 /* pointers to pcis and pcbddc */ 179 PC_IS* pcis = (PC_IS*)pc->data; 180 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 181 /* submatrices of local problem */ 182 Mat A_RV,A_VR,A_VV; 183 /* working matrices */ 184 Mat M1,M2,M3,C_CR; 185 /* working vectors */ 186 Vec vec1_C,vec2_C,vec1_V,vec2_V; 187 /* additional working stuff */ 188 IS is_aux; 189 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 190 const PetscScalar *array,*row_cmat_values; 191 const PetscInt *row_cmat_indices,*idx_R_local; 192 PetscInt *idx_V_B,*auxindices; 193 PetscInt n_vertices,n_constraints,size_of_constraint; 194 PetscInt i,j,n_R,n_D,n_B; 195 PetscBool unsymmetric_check; 196 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 197 MatType impMatType; 198 /* some shortcuts to scalars */ 199 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 200 /* for debugging purposes */ 201 PetscReal *coarsefunctions_errors,*constraints_errors; 202 203 PetscFunctionBegin; 204 /* get number of vertices (corners plus constraints with change of basis) 205 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 206 n_vertices = pcbddc->n_actual_vertices; 207 n_constraints = pcbddc->local_primal_size-n_vertices; 208 /* Set Non-overlapping dimensions */ 209 n_B = pcis->n_B; n_D = pcis->n - n_B; 210 n_R = pcis->n-n_vertices; 211 212 /* Set types for local objects needed by BDDC precondtioner */ 213 impMatType = MATSEQDENSE; 214 215 /* Allocating some extra storage just to be safe */ 216 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 217 for (i=0;i<pcis->n;i++) auxindices[i]=i; 218 219 /* vertices in boundary numbering */ 220 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 221 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 222 if (i != n_vertices) { 223 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 224 } 225 226 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 227 if (n_constraints) { 228 /* see if we can save some allocations */ 229 if (pcbddc->local_auxmat2) { 230 PetscInt on_R,on_constraints; 231 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 232 if (on_R != n_R || on_constraints != n_constraints) { 233 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 234 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 235 } 236 } 237 /* work vectors */ 238 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 239 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 240 /* auxiliary matrices */ 241 if (!pcbddc->local_auxmat2) { 242 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 243 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 244 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 245 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 246 } 247 248 /* Extract constraints on R nodes: C_{CR} */ 249 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 250 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 251 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 252 253 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 254 for (i=0;i<n_constraints;i++) { 255 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 256 /* Get row of constraint matrix in R numbering */ 257 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 258 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 259 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 260 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 261 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 262 /* Solve for row of constraint matrix in R numbering */ 263 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 264 /* Set values in local_auxmat2 */ 265 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 266 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 267 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 268 } 269 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 270 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 271 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 272 273 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 274 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 275 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 276 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 277 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 278 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 279 ierr = MatSetUp(M1);CHKERRQ(ierr); 280 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 281 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 282 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 283 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 284 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 285 ierr = MatDestroy(&M2);CHKERRQ(ierr); 286 ierr = MatDestroy(&M3);CHKERRQ(ierr); 287 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 288 if (!pcbddc->local_auxmat1) { 289 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 290 } else { 291 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 292 } 293 } 294 295 /* Get submatrices from subdomain matrix */ 296 if (n_vertices) { 297 PetscInt ibs,mbs; 298 PetscBool issbaij; 299 Mat newmat; 300 301 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 302 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 303 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 304 if (ibs != mbs) { /* need to convert to SEQAIJ */ 305 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 307 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 308 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 309 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 310 } else { 311 /* this is safe */ 312 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 313 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 314 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 315 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 316 /* which of the two approaches is faster? */ 317 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 318 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 319 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 320 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 321 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 322 } else { 323 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 324 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 325 } 326 } 327 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 328 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 329 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 330 } 331 332 /* Matrix of coarse basis functions (local) */ 333 if (pcbddc->coarse_phi_B) { 334 PetscInt on_B,on_primal; 335 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 336 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 337 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 338 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 339 } 340 } 341 if (pcbddc->coarse_phi_D) { 342 PetscInt on_D,on_primal; 343 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 344 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 345 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 346 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 347 } 348 } 349 if (!pcbddc->coarse_phi_B) { 350 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 351 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 352 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 353 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 354 } 355 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 356 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 357 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 358 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 359 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 360 } 361 362 if (pcbddc->dbg_flag) { 363 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 364 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 365 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 366 } 367 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 368 ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr); 369 370 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 371 372 /* vertices */ 373 for (i=0;i<n_vertices;i++) { 374 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 375 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 376 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 377 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 378 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 379 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 380 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 381 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 382 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 383 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 384 if (n_constraints) { 385 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 386 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 387 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 388 } 389 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 390 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 391 392 /* Set values in coarse basis function and subdomain part of coarse_mat */ 393 /* coarse basis functions */ 394 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 395 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 396 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 397 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 398 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 399 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 400 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 401 if (pcbddc->switch_static || pcbddc->dbg_flag) { 402 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 404 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 405 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 406 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 407 } 408 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 409 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 410 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 411 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 412 if (n_constraints) { 413 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 414 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 416 } 417 418 /* check */ 419 if (pcbddc->dbg_flag) { 420 /* assemble subdomain vector on local nodes */ 421 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 422 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 423 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 425 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 426 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 427 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 428 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 429 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 430 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 431 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 432 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 433 if (n_constraints) { 434 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 435 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 436 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 437 } 438 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 440 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 441 /* check saddle point solution */ 442 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 443 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 444 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 445 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 446 /* shift by the identity matrix */ 447 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 448 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 449 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 450 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 451 } 452 } 453 454 /* constraints */ 455 for (i=0;i<n_constraints;i++) { 456 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 457 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 458 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 459 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 460 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 461 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 462 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 463 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 464 if (n_vertices) { 465 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 466 } 467 /* Set values in coarse basis function and subdomain part of coarse_mat */ 468 /* coarse basis functions */ 469 j = i+n_vertices; /* don't touch this! */ 470 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 471 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 473 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 474 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 475 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 476 if (pcbddc->switch_static || pcbddc->dbg_flag) { 477 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 480 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 481 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 482 } 483 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 484 if (n_vertices) { 485 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 486 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 487 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 488 } 489 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 490 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 491 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 492 493 if (pcbddc->dbg_flag) { 494 /* assemble subdomain vector on nodes */ 495 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 496 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 497 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 498 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 499 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 500 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 501 /* assemble subdomain vector of lagrange multipliers */ 502 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 503 if (n_vertices) { 504 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 505 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 506 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 507 } 508 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 509 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 510 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 511 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 512 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 513 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 514 /* check saddle point solution */ 515 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 516 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 517 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 518 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 519 /* shift by the identity matrix */ 520 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 521 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 522 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 523 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 524 } 525 } 526 /* call assembling routines for local coarse basis */ 527 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 529 if (pcbddc->switch_static || pcbddc->dbg_flag) { 530 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 532 } 533 534 /* compute other basis functions for non-symmetric problems */ 535 ierr = MatIsSymmetric(pc->pmat,1.e-4,&pcbddc->issym);CHKERRQ(ierr); 536 if (!pcbddc->issym) { 537 if (!pcbddc->coarse_psi_B) { 538 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 539 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 540 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 541 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 542 } 543 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 544 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 545 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 546 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 547 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 548 } 549 for (i=0;i<pcbddc->local_primal_size;i++) { 550 if (n_constraints) { 551 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 552 for (j=0;j<n_constraints;j++) { 553 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 554 } 555 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 556 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 557 } 558 if (i<n_vertices) { 559 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 560 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 561 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 562 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 563 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 564 if (n_constraints) { 565 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 566 } 567 } else { 568 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 569 } 570 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 571 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 572 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 573 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 574 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 575 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 576 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 577 if (i<n_vertices) { 578 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 579 } 580 if (pcbddc->switch_static || pcbddc->dbg_flag) { 581 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 582 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 583 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 584 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 585 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 586 } 587 588 if (pcbddc->dbg_flag) { 589 /* assemble subdomain vector on nodes */ 590 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 591 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 592 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 593 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 594 if (i<n_vertices) { 595 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 596 } 597 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 598 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 599 /* assemble subdomain vector of lagrange multipliers */ 600 for (j=0;j<pcbddc->local_primal_size;j++) { 601 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 602 } 603 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 604 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 605 /* check saddle point solution */ 606 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 607 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 608 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 609 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 610 /* shift by the identity matrix */ 611 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 612 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 613 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 614 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 615 } 616 } 617 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 618 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 619 if (pcbddc->switch_static || pcbddc->dbg_flag) { 620 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 621 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 622 } 623 unsymmetric_check = PETSC_TRUE; 624 } else { /* take references to already computed coarse basis */ 625 unsymmetric_check = PETSC_FALSE; 626 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 627 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 628 if (pcbddc->coarse_phi_D) { 629 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 630 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 631 } 632 } 633 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 634 /* Checking coarse_sub_mat and coarse basis functios */ 635 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 636 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 637 if (pcbddc->dbg_flag) { 638 Mat coarse_sub_mat; 639 Mat AUXMAT,TM1,TM2,TM3,TM4; 640 Mat coarse_phi_D,coarse_phi_B; 641 Mat coarse_psi_D,coarse_psi_B; 642 Mat A_II,A_BB,A_IB,A_BI; 643 MatType checkmattype=MATSEQAIJ; 644 PetscReal real_value; 645 646 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 647 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 648 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 649 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 650 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 651 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 652 if (unsymmetric_check) { 653 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 654 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 655 } 656 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 657 658 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 659 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 660 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 661 if (unsymmetric_check) { 662 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 663 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 664 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 665 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 666 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 667 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 668 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 669 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 670 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 671 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 672 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 673 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 674 } else { 675 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 676 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 677 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 678 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 679 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 680 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 681 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 682 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 683 } 684 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 685 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 686 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 687 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 688 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 689 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 690 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 695 for (i=0;i<pcbddc->local_primal_size;i++) { 696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 697 } 698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 699 for (i=0;i<pcbddc->local_primal_size;i++) { 700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 701 } 702 if (unsymmetric_check) { 703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 704 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 706 } 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 708 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 709 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 710 } 711 } 712 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 713 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 714 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 715 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 716 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 717 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 718 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 719 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 720 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 721 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 722 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 723 if (unsymmetric_check) { 724 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 725 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 726 } 727 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 728 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 729 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 730 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 731 } 732 /* free memory */ 733 if (n_vertices) { 734 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 735 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 736 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 737 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 738 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 739 } 740 if (n_constraints) { 741 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 742 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 743 ierr = MatDestroy(&M1);CHKERRQ(ierr); 744 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 745 } 746 ierr = PetscFree(auxindices);CHKERRQ(ierr); 747 /* get back data */ 748 *coarse_submat_vals_n = coarse_submat_vals; 749 PetscFunctionReturn(0); 750 } 751 752 #undef __FUNCT__ 753 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 754 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 755 { 756 PC_IS* pcis = (PC_IS*)(pc->data); 757 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 758 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 759 PetscBool issbaij,isseqaij; 760 /* manage repeated solves */ 761 MatReuse reuse; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) { 766 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 767 } 768 /* get mat flags */ 769 reuse = MAT_INITIAL_MATRIX; 770 if (pc->setupcalled) { 771 if (pc->flag == SAME_NONZERO_PATTERN) { 772 reuse = MAT_REUSE_MATRIX; 773 } else { 774 reuse = MAT_INITIAL_MATRIX; 775 } 776 } 777 if (reuse == MAT_INITIAL_MATRIX) { 778 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 779 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 780 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 781 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 782 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 783 } 784 785 /* transform local matrices if needed */ 786 if (pcbddc->ChangeOfBasisMatrix) { 787 Mat change_mat_all; 788 PetscScalar *row_cmat_values; 789 PetscInt *row_cmat_indices; 790 PetscInt *nnz,*is_indices,*temp_indices; 791 PetscInt i,j,k,n_D,n_B; 792 793 /* Get Non-overlapping dimensions */ 794 n_B = pcis->n_B; 795 n_D = pcis->n-n_B; 796 797 /* compute nonzero structure of change of basis on all local nodes */ 798 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 799 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 800 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 801 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 802 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 803 k=1; 804 for (i=0;i<n_B;i++) { 805 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 806 nnz[is_indices[i]]=j; 807 if (k < j) k = j; 808 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 809 } 810 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 811 /* assemble change of basis matrix on the whole set of local dofs */ 812 ierr = PetscMalloc1(k,&temp_indices);CHKERRQ(ierr); 813 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 814 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 815 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 816 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 817 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 818 for (i=0;i<n_D;i++) { 819 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 820 } 821 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 822 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 823 for (i=0;i<n_B;i++) { 824 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 825 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 826 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 827 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 828 } 829 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 830 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 831 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 832 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr); 833 if (isseqaij) { 834 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 835 } else { 836 Mat work_mat; 837 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 838 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 839 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 840 } 841 /* 842 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 843 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 844 */ 845 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 846 ierr = PetscFree(nnz);CHKERRQ(ierr); 847 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 848 } else { 849 /* without change of basis, the local matrix is unchanged */ 850 if (!pcbddc->local_mat) { 851 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 852 pcbddc->local_mat = matis->A; 853 } 854 } 855 856 /* get submatrices */ 857 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 858 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 859 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 860 if (!issbaij) { 861 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 862 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 863 } else { 864 Mat newmat; 865 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 866 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 867 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 868 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 869 } 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNCT__ 874 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 875 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 876 { 877 PC_IS* pcis = (PC_IS*)(pc->data); 878 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 879 IS is_aux1,is_aux2; 880 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 881 PetscInt n_vertices,i,j,n_R,n_D,n_B; 882 PetscInt vbs,bs; 883 PetscBT bitmask; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 /* 888 No need to setup local scatters if 889 - primal space is unchanged 890 AND 891 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 892 AND 893 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 894 */ 895 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 896 PetscFunctionReturn(0); 897 } 898 /* destroy old objects */ 899 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 900 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 901 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 902 /* Set Non-overlapping dimensions */ 903 n_B = pcis->n_B; n_D = pcis->n - n_B; 904 n_vertices = pcbddc->n_actual_vertices; 905 /* create auxiliary bitmask */ 906 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 907 for (i=0;i<n_vertices;i++) { 908 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 909 } 910 911 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 912 ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr); 913 for (i=0, n_R=0; i<pcis->n; i++) { 914 if (!PetscBTLookup(bitmask,i)) { 915 idx_R_local[n_R] = i; 916 n_R++; 917 } 918 } 919 920 /* Block code */ 921 vbs = 1; 922 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 923 if (bs>1 && !(n_vertices%bs)) { 924 PetscBool is_blocked = PETSC_TRUE; 925 PetscInt *vary; 926 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 927 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 928 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 929 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 930 for (i=0; i<n_vertices; i++) { 931 if (vary[i]!=0 && vary[i]!=bs) { 932 is_blocked = PETSC_FALSE; 933 break; 934 } 935 } 936 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 937 vbs = bs; 938 for (i=0;i<n_R/vbs;i++) { 939 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 940 } 941 } 942 ierr = PetscFree(vary);CHKERRQ(ierr); 943 } 944 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 945 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 946 947 /* print some info if requested */ 948 if (pcbddc->dbg_flag) { 949 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 950 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 951 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 952 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 953 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 954 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 956 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 957 } 958 959 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 960 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 961 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr); 962 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr); 963 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 964 for (i=0; i<n_D; i++) { 965 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 966 } 967 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 968 for (i=0, j=0; i<n_R; i++) { 969 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 970 aux_array1[j++] = i; 971 } 972 } 973 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 974 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 975 for (i=0, j=0; i<n_B; i++) { 976 if (!PetscBTLookup(bitmask,is_indices[i])) { 977 aux_array2[j++] = i; 978 } 979 } 980 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 981 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 982 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 983 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 984 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 985 986 if (pcbddc->switch_static || pcbddc->dbg_flag) { 987 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 988 for (i=0, j=0; i<n_R; i++) { 989 if (PetscBTLookup(bitmask,idx_R_local[i])) { 990 aux_array1[j++] = i; 991 } 992 } 993 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 994 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 995 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 996 } 997 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 998 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1005 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1006 { 1007 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1008 PC_IS *pcis = (PC_IS*)pc->data; 1009 PC pc_temp; 1010 Mat A_RR; 1011 MatReuse reuse; 1012 PetscScalar m_one = -1.0; 1013 PetscReal value; 1014 PetscInt n_D,n_R,ibs,mbs; 1015 PetscBool use_exact,use_exact_reduced,issbaij; 1016 PetscErrorCode ierr; 1017 /* prefixes stuff */ 1018 char dir_prefix[256],neu_prefix[256],str_level[16]; 1019 size_t len; 1020 1021 PetscFunctionBegin; 1022 1023 /* compute prefixes */ 1024 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1025 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1026 if (!pcbddc->current_level) { 1027 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1028 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1029 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1030 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1031 } else { 1032 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1033 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1034 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1035 len -= 15; /* remove "pc_bddc_coarse_" */ 1036 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1037 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1038 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1039 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1040 *(dir_prefix+len)='\0'; 1041 *(neu_prefix+len)='\0'; 1042 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1043 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1044 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1045 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1046 } 1047 1048 /* DIRICHLET PROBLEM */ 1049 /* Matrix for Dirichlet problem is pcis->A_II */ 1050 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1051 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1052 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1053 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1054 /* default */ 1055 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1056 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1057 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1058 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1059 if (issbaij) { 1060 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1061 } else { 1062 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1063 } 1064 /* Allow user's customization */ 1065 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1066 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1067 } 1068 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1069 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1070 if (!n_D) { 1071 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1072 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1073 } 1074 /* Set Up KSP for Dirichlet problem of BDDC */ 1075 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1076 /* set ksp_D into pcis data */ 1077 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1078 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1079 pcis->ksp_D = pcbddc->ksp_D; 1080 1081 /* NEUMANN PROBLEM */ 1082 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1083 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1084 if (pcbddc->ksp_R) { /* already created ksp */ 1085 PetscInt nn_R; 1086 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1087 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1088 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1089 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1090 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1091 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1092 reuse = MAT_INITIAL_MATRIX; 1093 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1094 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1095 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1096 reuse = MAT_INITIAL_MATRIX; 1097 } else { /* safe to reuse the matrix */ 1098 reuse = MAT_REUSE_MATRIX; 1099 } 1100 } 1101 /* last check */ 1102 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1103 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1104 reuse = MAT_INITIAL_MATRIX; 1105 } 1106 } else { /* first time, so we need to create the matrix */ 1107 reuse = MAT_INITIAL_MATRIX; 1108 } 1109 /* extract A_RR */ 1110 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1111 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1112 if (ibs != mbs) { 1113 Mat newmat; 1114 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1115 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1116 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1117 } else { 1118 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1119 } 1120 if (!pcbddc->ksp_R) { /* create object if not present */ 1121 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1122 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1123 /* default */ 1124 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1125 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1126 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1127 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1128 if (issbaij) { 1129 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1130 } else { 1131 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1132 } 1133 /* Allow user's customization */ 1134 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1135 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1136 } 1137 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1138 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1139 if (!n_R) { 1140 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1141 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1142 } 1143 /* Set Up KSP for Neumann problem of BDDC */ 1144 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1145 1146 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1147 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1148 /* Dirichlet */ 1149 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1150 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1151 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1152 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1153 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1154 /* need to be adapted? */ 1155 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1156 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1157 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1158 /* print info */ 1159 if (pcbddc->dbg_flag) { 1160 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1161 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1162 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1163 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1164 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1165 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1166 } 1167 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1168 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1169 } 1170 1171 /* Neumann */ 1172 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1173 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1174 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1175 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1176 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1177 /* need to be adapted? */ 1178 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1179 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1180 /* print info */ 1181 if (pcbddc->dbg_flag) { 1182 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1183 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1184 } 1185 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1186 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1187 } 1188 } 1189 /* free Neumann problem's matrix */ 1190 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1196 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1197 { 1198 PetscErrorCode ierr; 1199 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1200 1201 PetscFunctionBegin; 1202 if (applytranspose) { 1203 if (pcbddc->local_auxmat1) { 1204 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1205 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1206 } 1207 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1208 } else { 1209 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1210 if (pcbddc->local_auxmat1) { 1211 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1212 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1213 } 1214 } 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1221 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1222 { 1223 PetscErrorCode ierr; 1224 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1225 PC_IS* pcis = (PC_IS*) (pc->data); 1226 const PetscScalar zero = 0.0; 1227 1228 PetscFunctionBegin; 1229 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1230 if (applytranspose) { 1231 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1232 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1233 } else { 1234 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1235 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1236 } 1237 /* start communications from local primal nodes to rhs of coarse solver */ 1238 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1239 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1240 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1241 1242 /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */ 1243 /* TODO remove null space when doing multilevel */ 1244 if (pcbddc->coarse_ksp) { 1245 if (applytranspose) { 1246 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1247 } else { 1248 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1249 } 1250 } 1251 /* start communications from coarse solver solution to local primal nodes */ 1252 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1253 1254 /* Local solution on R nodes */ 1255 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1256 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1257 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1258 if (pcbddc->switch_static) { 1259 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1260 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1261 } 1262 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1263 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1264 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1266 if (pcbddc->switch_static) { 1267 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 } 1270 1271 /* complete communications from coarse sol to local primal nodes */ 1272 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1273 1274 /* Sum contributions from two levels */ 1275 if (applytranspose) { 1276 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1277 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1278 } else { 1279 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1280 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1281 } 1282 PetscFunctionReturn(0); 1283 } 1284 1285 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1288 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1289 { 1290 PetscErrorCode ierr; 1291 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1292 PetscScalar *array,*array2; 1293 Vec from,to; 1294 1295 PetscFunctionBegin; 1296 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1297 from = pcbddc->coarse_vec; 1298 to = pcbddc->vec1_P; 1299 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1300 Vec tvec; 1301 PetscInt lsize; 1302 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1303 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1304 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1305 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1306 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1307 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1308 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1309 } 1310 } else { /* from local to global -> put data in coarse right hand side */ 1311 from = pcbddc->vec1_P; 1312 to = pcbddc->coarse_vec; 1313 } 1314 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1315 PetscFunctionReturn(0); 1316 } 1317 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1320 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1321 { 1322 PetscErrorCode ierr; 1323 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1324 PetscScalar *array,*array2; 1325 Vec from,to; 1326 1327 PetscFunctionBegin; 1328 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1329 from = pcbddc->coarse_vec; 1330 to = pcbddc->vec1_P; 1331 } else { /* from local to global -> put data in coarse right hand side */ 1332 from = pcbddc->vec1_P; 1333 to = pcbddc->coarse_vec; 1334 } 1335 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1336 if (smode == SCATTER_FORWARD) { 1337 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1338 Vec tvec; 1339 PetscInt lsize; 1340 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1341 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1342 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1343 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1344 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1345 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1346 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1347 } 1348 } 1349 PetscFunctionReturn(0); 1350 } 1351 1352 /* uncomment for testing purposes */ 1353 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1356 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1357 { 1358 PetscErrorCode ierr; 1359 PC_IS* pcis = (PC_IS*)(pc->data); 1360 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1361 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1362 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1363 MatType impMatType=MATSEQAIJ; 1364 /* one and zero */ 1365 PetscScalar one=1.0,zero=0.0; 1366 /* space to store constraints and their local indices */ 1367 PetscScalar *temp_quadrature_constraint; 1368 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1369 /* iterators */ 1370 PetscInt i,j,k,total_counts,temp_start_ptr; 1371 /* stuff to store connected components stored in pcbddc->mat_graph */ 1372 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1373 PetscInt n_ISForFaces,n_ISForEdges; 1374 /* near null space stuff */ 1375 MatNullSpace nearnullsp; 1376 const Vec *nearnullvecs; 1377 Vec *localnearnullsp; 1378 PetscBool nnsp_has_cnst; 1379 PetscInt nnsp_size; 1380 PetscScalar *array; 1381 /* BLAS integers */ 1382 PetscBLASInt lwork,lierr; 1383 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1384 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1385 /* LAPACK working arrays for SVD or POD */ 1386 PetscBool skip_lapack; 1387 PetscScalar *work; 1388 PetscReal *singular_vals; 1389 #if defined(PETSC_USE_COMPLEX) 1390 PetscReal *rwork; 1391 #endif 1392 #if defined(PETSC_MISSING_LAPACK_GESVD) 1393 PetscBLASInt Blas_one_2=1; 1394 PetscScalar *temp_basis,*correlation_mat; 1395 #else 1396 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1397 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1398 #endif 1399 /* reuse */ 1400 PetscInt olocal_primal_size; 1401 PetscInt *oprimal_indices_local_idxs; 1402 /* change of basis */ 1403 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1404 PetscBool boolforchange,qr_needed; 1405 PetscBT touched,change_basis,qr_needed_idx; 1406 /* auxiliary stuff */ 1407 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1408 /* some quantities */ 1409 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1410 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1411 1412 1413 PetscFunctionBegin; 1414 /* Destroy Mat objects computed previously */ 1415 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1416 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1417 /* Get index sets for faces, edges and vertices from graph */ 1418 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1419 pcbddc->use_vertices = PETSC_TRUE; 1420 } 1421 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1422 /* HACKS (the two following code branches) */ 1423 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1424 pcbddc->use_change_of_basis = PETSC_TRUE; 1425 pcbddc->use_change_on_faces = PETSC_FALSE; 1426 } 1427 if (pcbddc->NullSpace) { 1428 /* use_change_of_basis should be consistent among processors */ 1429 PetscBool tbool = pcbddc->use_change_of_basis; 1430 ierr = MPI_Allreduce(&tbool,&(pcbddc->use_change_of_basis),1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1431 } 1432 /* print some info */ 1433 if (pcbddc->dbg_flag) { 1434 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1435 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1436 i = 0; 1437 if (ISForVertices) { 1438 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1439 } 1440 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1441 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1442 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1443 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1444 } 1445 /* check if near null space is attached to global mat */ 1446 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1447 if (nearnullsp) { 1448 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1449 /* remove any stored info */ 1450 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1451 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1452 /* store information for BDDC solver reuse */ 1453 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1454 pcbddc->onearnullspace = nearnullsp; 1455 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1456 for (i=0;i<nnsp_size;i++) { 1457 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1458 } 1459 } else { /* if near null space is not provided BDDC uses constants by default */ 1460 nnsp_size = 0; 1461 nnsp_has_cnst = PETSC_TRUE; 1462 } 1463 /* get max number of constraints on a single cc */ 1464 max_constraints = nnsp_size; 1465 if (nnsp_has_cnst) max_constraints++; 1466 1467 /* 1468 Evaluate maximum storage size needed by the procedure 1469 - temp_indices will contain start index of each constraint stored as follows 1470 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1471 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1472 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1473 */ 1474 total_counts = n_ISForFaces+n_ISForEdges; 1475 total_counts *= max_constraints; 1476 n_vertices = 0; 1477 if (ISForVertices) { 1478 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1479 } 1480 total_counts += n_vertices; 1481 ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr); 1482 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1483 total_counts = 0; 1484 max_size_of_constraint = 0; 1485 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1486 if (i<n_ISForEdges) { 1487 used_IS = &ISForEdges[i]; 1488 } else { 1489 used_IS = &ISForFaces[i-n_ISForEdges]; 1490 } 1491 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1492 total_counts += j; 1493 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1494 } 1495 total_counts *= max_constraints; 1496 total_counts += n_vertices; 1497 ierr = PetscMalloc1(total_counts,&temp_quadrature_constraint);CHKERRQ(ierr); 1498 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint);CHKERRQ(ierr); 1499 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1500 /* get local part of global near null space vectors */ 1501 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1502 for (k=0;k<nnsp_size;k++) { 1503 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1504 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1505 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1506 } 1507 1508 /* whether or not to skip lapack calls */ 1509 skip_lapack = PETSC_TRUE; 1510 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1511 1512 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1513 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1514 PetscScalar temp_work; 1515 #if defined(PETSC_MISSING_LAPACK_GESVD) 1516 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1517 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1518 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1519 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1520 #if defined(PETSC_USE_COMPLEX) 1521 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1522 #endif 1523 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1524 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1525 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1526 lwork = -1; 1527 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1528 #if !defined(PETSC_USE_COMPLEX) 1529 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1530 #else 1531 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1532 #endif 1533 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1534 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1535 #else /* on missing GESVD */ 1536 /* SVD */ 1537 PetscInt max_n,min_n; 1538 max_n = max_size_of_constraint; 1539 min_n = max_constraints; 1540 if (max_size_of_constraint < max_constraints) { 1541 min_n = max_size_of_constraint; 1542 max_n = max_constraints; 1543 } 1544 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1545 #if defined(PETSC_USE_COMPLEX) 1546 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1547 #endif 1548 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1549 lwork = -1; 1550 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1551 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1552 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1553 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1554 #if !defined(PETSC_USE_COMPLEX) 1555 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1556 #else 1557 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1558 #endif 1559 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1560 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1561 #endif /* on missing GESVD */ 1562 /* Allocate optimal workspace */ 1563 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1564 ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr); 1565 } 1566 /* Now we can loop on constraining sets */ 1567 total_counts = 0; 1568 temp_indices[0] = 0; 1569 /* vertices */ 1570 if (ISForVertices) { 1571 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1572 if (nnsp_has_cnst) { /* consider all vertices */ 1573 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1574 for (i=0;i<n_vertices;i++) { 1575 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1576 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1577 total_counts++; 1578 } 1579 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1580 PetscBool used_vertex; 1581 for (i=0;i<n_vertices;i++) { 1582 used_vertex = PETSC_FALSE; 1583 k = 0; 1584 while (!used_vertex && k<nnsp_size) { 1585 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1586 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1587 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1588 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1589 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1590 total_counts++; 1591 used_vertex = PETSC_TRUE; 1592 } 1593 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1594 k++; 1595 } 1596 } 1597 } 1598 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1599 n_vertices = total_counts; 1600 } 1601 1602 /* edges and faces */ 1603 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1604 if (i<n_ISForEdges) { 1605 used_IS = &ISForEdges[i]; 1606 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1607 } else { 1608 used_IS = &ISForFaces[i-n_ISForEdges]; 1609 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1610 } 1611 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1612 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1613 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1614 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1615 /* change of basis should not be performed on local periodic nodes */ 1616 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1617 if (nnsp_has_cnst) { 1618 PetscScalar quad_value; 1619 temp_constraints++; 1620 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1621 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1622 for (j=0;j<size_of_constraint;j++) { 1623 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1624 } 1625 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1626 total_counts++; 1627 } 1628 for (k=0;k<nnsp_size;k++) { 1629 PetscReal real_value; 1630 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1631 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1632 for (j=0;j<size_of_constraint;j++) { 1633 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1634 } 1635 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1636 /* check if array is null on the connected component */ 1637 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1638 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1639 if (real_value > 0.0) { /* keep indices and values */ 1640 temp_constraints++; 1641 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1642 total_counts++; 1643 } 1644 } 1645 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1646 valid_constraints = temp_constraints; 1647 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1648 if (!pcbddc->use_nnsp_true && temp_constraints) { 1649 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1650 1651 #if defined(PETSC_MISSING_LAPACK_GESVD) 1652 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1653 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1654 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1655 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1656 from that computed using LAPACKgesvd 1657 -> This is due to a different computation of eigenvectors in LAPACKheev 1658 -> The quality of the POD-computed basis will be the same */ 1659 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1660 /* Store upper triangular part of correlation matrix */ 1661 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1662 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1663 for (j=0;j<temp_constraints;j++) { 1664 for (k=0;k<j+1;k++) { 1665 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1666 } 1667 } 1668 /* compute eigenvalues and eigenvectors of correlation matrix */ 1669 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1670 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1671 #if !defined(PETSC_USE_COMPLEX) 1672 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1673 #else 1674 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1675 #endif 1676 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1677 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1678 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1679 j=0; 1680 while (j < temp_constraints && singular_vals[j] < tol) j++; 1681 total_counts=total_counts-j; 1682 valid_constraints = temp_constraints-j; 1683 /* scale and copy POD basis into used quadrature memory */ 1684 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1685 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1686 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1687 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1688 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1689 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1690 if (j<temp_constraints) { 1691 PetscInt ii; 1692 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1693 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1694 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1695 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1696 for (k=0;k<temp_constraints-j;k++) { 1697 for (ii=0;ii<size_of_constraint;ii++) { 1698 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1699 } 1700 } 1701 } 1702 #else /* on missing GESVD */ 1703 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1704 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1705 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1706 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1707 #if !defined(PETSC_USE_COMPLEX) 1708 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1709 #else 1710 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1711 #endif 1712 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1713 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1714 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1715 k = temp_constraints; 1716 if (k > size_of_constraint) k = size_of_constraint; 1717 j = 0; 1718 while (j < k && singular_vals[k-j-1] < tol) j++; 1719 total_counts = total_counts-temp_constraints+k-j; 1720 valid_constraints = k-j; 1721 #endif /* on missing GESVD */ 1722 } 1723 /* setting change_of_basis flag is safe now */ 1724 if (boolforchange) { 1725 for (j=0;j<valid_constraints;j++) { 1726 PetscBTSet(change_basis,total_counts-j-1); 1727 } 1728 } 1729 } 1730 /* free index sets of faces, edges and vertices */ 1731 for (i=0;i<n_ISForFaces;i++) { 1732 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1733 } 1734 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1735 for (i=0;i<n_ISForEdges;i++) { 1736 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1737 } 1738 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1739 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1740 /* map temp_indices_to_constraint in boundary numbering */ 1741 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1742 if (i != temp_indices[total_counts]) { 1743 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1744 } 1745 1746 /* free workspace */ 1747 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1748 ierr = PetscFree(work);CHKERRQ(ierr); 1749 #if defined(PETSC_USE_COMPLEX) 1750 ierr = PetscFree(rwork);CHKERRQ(ierr); 1751 #endif 1752 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1753 #if defined(PETSC_MISSING_LAPACK_GESVD) 1754 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1755 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1756 #endif 1757 } 1758 for (k=0;k<nnsp_size;k++) { 1759 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1760 } 1761 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1762 1763 /* set quantities in pcbddc data structure and store previous primal size */ 1764 /* n_vertices defines the number of subdomain corners in the primal space */ 1765 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1766 olocal_primal_size = pcbddc->local_primal_size; 1767 pcbddc->local_primal_size = total_counts; 1768 pcbddc->n_vertices = n_vertices; 1769 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1770 1771 /* Create constraint matrix */ 1772 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1773 /* so we need to set it up properly either with or without change of basis */ 1774 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1775 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1776 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1777 /* array to compute a local numbering of constraints : vertices first then constraints */ 1778 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1779 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1780 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1781 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1782 /* auxiliary stuff for basis change */ 1783 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1784 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1785 1786 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1787 total_primal_vertices=0; 1788 for (i=0;i<pcbddc->local_primal_size;i++) { 1789 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1790 if (size_of_constraint == 1) { 1791 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1792 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1793 aux_primal_minloc[total_primal_vertices]=0; 1794 total_primal_vertices++; 1795 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1796 PetscInt min_loc,min_index; 1797 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1798 /* find first untouched local node */ 1799 k = 0; 1800 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1801 min_index = global_indices[k]; 1802 min_loc = k; 1803 /* search the minimum among global nodes already untouched on the cc */ 1804 for (k=1;k<size_of_constraint;k++) { 1805 /* there can be more than one constraint on a single connected component */ 1806 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1807 min_index = global_indices[k]; 1808 min_loc = k; 1809 } 1810 } 1811 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1812 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1813 aux_primal_minloc[total_primal_vertices]=min_loc; 1814 total_primal_vertices++; 1815 } 1816 } 1817 /* determine if a QR strategy is needed for change of basis */ 1818 qr_needed = PETSC_FALSE; 1819 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1820 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1821 if (PetscBTLookup(change_basis,i)) { 1822 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1823 j = 0; 1824 for (k=0;k<size_of_constraint;k++) { 1825 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1826 j++; 1827 } 1828 } 1829 /* found more than one primal dof on the cc */ 1830 if (j > 1) { 1831 PetscBTSet(qr_needed_idx,i); 1832 qr_needed = PETSC_TRUE; 1833 } 1834 } 1835 } 1836 /* free workspace */ 1837 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1838 1839 /* permute indices in order to have a sorted set of vertices */ 1840 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1841 1842 /* nonzero structure of constraint matrix */ 1843 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1844 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1845 j=total_primal_vertices; 1846 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1847 if (!PetscBTLookup(change_basis,i)) { 1848 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1849 j++; 1850 } 1851 } 1852 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1853 ierr = PetscFree(nnz);CHKERRQ(ierr); 1854 /* set values in constraint matrix */ 1855 for (i=0;i<total_primal_vertices;i++) { 1856 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1857 } 1858 total_counts = total_primal_vertices; 1859 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1860 if (!PetscBTLookup(change_basis,i)) { 1861 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1862 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1863 total_counts++; 1864 } 1865 } 1866 /* assembling */ 1867 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1868 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1869 /* 1870 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1871 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1872 */ 1873 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1874 if (pcbddc->use_change_of_basis) { 1875 /* dual and primal dofs on a single cc */ 1876 PetscInt dual_dofs,primal_dofs; 1877 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1878 PetscInt primal_counter; 1879 /* working stuff for GEQRF */ 1880 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 1881 PetscBLASInt lqr_work; 1882 /* working stuff for UNGQR */ 1883 PetscScalar *gqr_work,lgqr_work_t; 1884 PetscBLASInt lgqr_work; 1885 /* working stuff for TRTRS */ 1886 PetscScalar *trs_rhs; 1887 PetscBLASInt Blas_NRHS; 1888 /* pointers for values insertion into change of basis matrix */ 1889 PetscInt *start_rows,*start_cols; 1890 PetscScalar *start_vals; 1891 /* working stuff for values insertion */ 1892 PetscBT is_primal; 1893 1894 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1895 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1896 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1897 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1898 /* work arrays */ 1899 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 1900 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1901 /* nonzeros per row */ 1902 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1903 if (PetscBTLookup(change_basis,i)) { 1904 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1905 if (PetscBTLookup(qr_needed_idx,i)) { 1906 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1907 } else { 1908 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1909 /* get local primal index on the cc */ 1910 j = 0; 1911 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1912 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1913 } 1914 } 1915 } 1916 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1917 ierr = PetscFree(nnz);CHKERRQ(ierr); 1918 /* Set initial identity in the matrix */ 1919 for (i=0;i<pcis->n_B;i++) { 1920 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1921 } 1922 1923 if (pcbddc->dbg_flag) { 1924 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1925 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1926 } 1927 1928 1929 /* Now we loop on the constraints which need a change of basis */ 1930 /* 1931 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1932 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1933 1934 Basic blocks of change of basis matrix T computed 1935 1936 - Using the following block transformation if there is only a primal dof on the cc 1937 (in the example, primal dof is the last one of the edge in LOCAL ordering 1938 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1939 | 1 0 ... 0 1 | 1940 | 0 1 ... 0 1 | 1941 | ... | 1942 | 0 ... 1 1 | 1943 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1944 1945 - via QR decomposition of constraints otherwise 1946 */ 1947 if (qr_needed) { 1948 /* space to store Q */ 1949 ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr); 1950 /* first we issue queries for optimal work */ 1951 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1952 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1953 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1954 lqr_work = -1; 1955 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1956 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1957 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1958 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 1959 lgqr_work = -1; 1960 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1961 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1962 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1963 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1964 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1965 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1966 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1967 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1968 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 1969 /* array to store scaling factors for reflectors */ 1970 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 1971 /* array to store rhs and solution of triangular solver */ 1972 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 1973 /* allocating workspace for check */ 1974 if (pcbddc->dbg_flag) { 1975 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 1976 } 1977 } 1978 /* array to store whether a node is primal or not */ 1979 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1980 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 1981 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 1982 if (i != total_primal_vertices) { 1983 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 1984 } 1985 for (i=0;i<total_primal_vertices;i++) { 1986 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 1987 } 1988 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 1989 1990 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1991 /* -> using implicit ordering contained in temp_indices data */ 1992 total_counts = pcbddc->n_vertices; 1993 primal_counter = total_counts; 1994 while (total_counts<pcbddc->local_primal_size) { 1995 primal_dofs = 1; 1996 if (PetscBTLookup(change_basis,total_counts)) { 1997 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1998 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1999 primal_dofs++; 2000 } 2001 /* get constraint info */ 2002 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2003 dual_dofs = size_of_constraint-primal_dofs; 2004 2005 if (pcbddc->dbg_flag) { 2006 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2007 } 2008 2009 if (primal_dofs > 1) { /* QR */ 2010 2011 /* copy quadrature constraints for change of basis check */ 2012 if (pcbddc->dbg_flag) { 2013 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2014 } 2015 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2016 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2017 2018 /* compute QR decomposition of constraints */ 2019 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2020 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2021 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2022 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2023 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2024 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2025 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2026 2027 /* explictly compute R^-T */ 2028 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2029 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2030 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2031 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2032 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2033 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2034 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2035 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2036 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2037 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2038 2039 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2040 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2041 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2042 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2043 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2044 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2045 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2046 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2047 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2048 2049 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2050 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2051 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2052 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2053 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2054 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2055 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2056 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2057 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2058 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2059 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2060 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2061 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2062 2063 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2064 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2065 /* insert cols for primal dofs */ 2066 for (j=0;j<primal_dofs;j++) { 2067 start_vals = &qr_basis[j*size_of_constraint]; 2068 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2069 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2070 } 2071 /* insert cols for dual dofs */ 2072 for (j=0,k=0;j<dual_dofs;k++) { 2073 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2074 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2075 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2076 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2077 j++; 2078 } 2079 } 2080 2081 /* check change of basis */ 2082 if (pcbddc->dbg_flag) { 2083 PetscInt ii,jj; 2084 PetscBool valid_qr=PETSC_TRUE; 2085 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2086 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2087 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2088 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2089 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2090 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2091 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2092 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2093 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2094 for (jj=0;jj<size_of_constraint;jj++) { 2095 for (ii=0;ii<primal_dofs;ii++) { 2096 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2097 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2098 } 2099 } 2100 if (!valid_qr) { 2101 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2102 for (jj=0;jj<size_of_constraint;jj++) { 2103 for (ii=0;ii<primal_dofs;ii++) { 2104 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2105 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2106 } 2107 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2108 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2109 } 2110 } 2111 } 2112 } else { 2113 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2114 } 2115 } 2116 } else { /* simple transformation block */ 2117 PetscInt row,col; 2118 PetscScalar val; 2119 for (j=0;j<size_of_constraint;j++) { 2120 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2121 if (!PetscBTLookup(is_primal,row)) { 2122 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2123 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2124 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2125 } else { 2126 for (k=0;k<size_of_constraint;k++) { 2127 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2128 if (row != col) { 2129 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2130 } else { 2131 val = 1.0; 2132 } 2133 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2134 } 2135 } 2136 } 2137 if (pcbddc->dbg_flag) { 2138 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2139 } 2140 } 2141 /* increment primal counter */ 2142 primal_counter += primal_dofs; 2143 } else { 2144 if (pcbddc->dbg_flag) { 2145 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2146 } 2147 } 2148 /* increment constraint counter total_counts */ 2149 total_counts += primal_dofs; 2150 } 2151 2152 /* free workspace */ 2153 if (qr_needed) { 2154 if (pcbddc->dbg_flag) { 2155 ierr = PetscFree(work);CHKERRQ(ierr); 2156 } 2157 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2158 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2159 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2160 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2161 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2162 } 2163 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2164 /* assembling */ 2165 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2166 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2167 /* 2168 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2169 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2170 */ 2171 } 2172 /* Change of basis as provided by the user in local numbering (internal and boundary) or boundary only */ 2173 if (pcbddc->user_ChangeOfBasisMatrix) { 2174 PetscInt rows,cols; 2175 ierr = MatGetSize(pcbddc->user_ChangeOfBasisMatrix,&rows,&cols);CHKERRQ(ierr); 2176 if (rows == pcis->n && cols == pcis->n) { 2177 ierr = MatGetSubMatrix(pcbddc->user_ChangeOfBasisMatrix,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2178 } else { 2179 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2180 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2181 } 2182 } 2183 2184 /* get indices in local ordering for vertices and constraints */ 2185 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2186 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2187 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2188 } 2189 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2190 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2191 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2192 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2193 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2194 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2195 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2196 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2197 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2198 /* set quantities in PCBDDC data struct */ 2199 pcbddc->n_actual_vertices = i; 2200 /* check if a new primal space has been introduced */ 2201 pcbddc->new_primal_space_local = PETSC_TRUE; 2202 if (olocal_primal_size == pcbddc->local_primal_size) { 2203 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2204 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2205 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2206 } 2207 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2208 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2209 2210 /* flush dbg viewer */ 2211 if (pcbddc->dbg_flag) { 2212 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2213 } 2214 2215 /* free workspace */ 2216 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2217 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2218 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2219 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2220 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2221 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2222 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2223 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2224 PetscFunctionReturn(0); 2225 } 2226 2227 #undef __FUNCT__ 2228 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2229 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2230 { 2231 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2232 PC_IS *pcis = (PC_IS*)pc->data; 2233 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2234 PetscInt ierr,i,vertex_size; 2235 PetscViewer viewer=pcbddc->dbg_viewer; 2236 2237 PetscFunctionBegin; 2238 /* Reset previously computed graph */ 2239 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2240 /* Init local Graph struct */ 2241 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2242 2243 /* Check validity of the csr graph passed in by the user */ 2244 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2245 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2246 } 2247 2248 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2249 if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) { 2250 Mat mat_adj; 2251 const PetscInt *xadj,*adjncy; 2252 PetscBool flg_row=PETSC_TRUE; 2253 2254 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2255 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2256 if (!flg_row) { 2257 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2258 } 2259 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2260 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2261 if (!flg_row) { 2262 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2263 } 2264 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2265 } 2266 2267 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2268 vertex_size = 1; 2269 if (pcbddc->user_provided_isfordofs) { 2270 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2271 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2272 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2273 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2274 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2275 } 2276 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2277 pcbddc->n_ISForDofs = 0; 2278 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2279 } 2280 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2281 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2282 } else { 2283 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2284 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2285 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2286 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2287 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2288 } 2289 } 2290 } 2291 2292 /* Setup of Graph */ 2293 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2294 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2295 } 2296 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2297 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2298 } 2299 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2300 2301 /* Graph's connected components analysis */ 2302 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2303 2304 /* print some info to stdout */ 2305 if (pcbddc->dbg_flag) { 2306 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2307 } 2308 2309 /* mark topography has done */ 2310 pcbddc->recompute_topography = PETSC_FALSE; 2311 PetscFunctionReturn(0); 2312 } 2313 2314 #undef __FUNCT__ 2315 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2316 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2317 { 2318 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2319 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2320 PetscErrorCode ierr; 2321 2322 PetscFunctionBegin; 2323 n = 0; 2324 vertices = 0; 2325 if (pcbddc->ConstraintMatrix) { 2326 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2327 for (i=0;i<local_primal_size;i++) { 2328 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2329 if (size_of_constraint == 1) n++; 2330 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2331 } 2332 if (vertices_idx) { 2333 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2334 n = 0; 2335 for (i=0;i<local_primal_size;i++) { 2336 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2337 if (size_of_constraint == 1) { 2338 vertices[n++]=row_cmat_indices[0]; 2339 } 2340 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2341 } 2342 } 2343 } 2344 *n_vertices = n; 2345 if (vertices_idx) *vertices_idx = vertices; 2346 PetscFunctionReturn(0); 2347 } 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2351 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2352 { 2353 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2354 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2355 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2356 PetscBT touched; 2357 PetscErrorCode ierr; 2358 2359 /* This function assumes that the number of local constraints per connected component 2360 is not greater than the number of nodes defined for the connected component 2361 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2362 PetscFunctionBegin; 2363 n = 0; 2364 constraints_index = 0; 2365 if (pcbddc->ConstraintMatrix) { 2366 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2367 max_size_of_constraint = 0; 2368 for (i=0;i<local_primal_size;i++) { 2369 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2370 if (size_of_constraint > 1) { 2371 n++; 2372 } 2373 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2374 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2375 } 2376 if (constraints_idx) { 2377 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2378 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2379 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2380 n = 0; 2381 for (i=0;i<local_primal_size;i++) { 2382 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2383 if (size_of_constraint > 1) { 2384 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2385 /* find first untouched local node */ 2386 j = 0; 2387 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2388 min_index = row_cmat_global_indices[j]; 2389 min_loc = j; 2390 /* search the minimum among nodes not yet touched on the connected component 2391 since there can be more than one constraint on a single cc */ 2392 for (j=1;j<size_of_constraint;j++) { 2393 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2394 min_index = row_cmat_global_indices[j]; 2395 min_loc = j; 2396 } 2397 } 2398 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2399 constraints_index[n++] = row_cmat_indices[min_loc]; 2400 } 2401 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2402 } 2403 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2404 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2405 } 2406 } 2407 *n_constraints = n; 2408 if (constraints_idx) *constraints_idx = constraints_index; 2409 PetscFunctionReturn(0); 2410 } 2411 2412 /* the next two functions has been adapted from pcis.c */ 2413 #undef __FUNCT__ 2414 #define __FUNCT__ "PCBDDCApplySchur" 2415 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2416 { 2417 PetscErrorCode ierr; 2418 PC_IS *pcis = (PC_IS*)(pc->data); 2419 2420 PetscFunctionBegin; 2421 if (!vec2_B) { vec2_B = v; } 2422 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2423 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2424 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2425 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2426 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 #undef __FUNCT__ 2431 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2432 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2433 { 2434 PetscErrorCode ierr; 2435 PC_IS *pcis = (PC_IS*)(pc->data); 2436 2437 PetscFunctionBegin; 2438 if (!vec2_B) { vec2_B = v; } 2439 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2440 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2441 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2442 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2443 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2444 PetscFunctionReturn(0); 2445 } 2446 2447 #undef __FUNCT__ 2448 #define __FUNCT__ "PCBDDCSubsetNumbering" 2449 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2450 { 2451 Vec local_vec,global_vec; 2452 IS seqis,paris; 2453 VecScatter scatter_ctx; 2454 PetscScalar *array; 2455 PetscInt *temp_global_dofs; 2456 PetscScalar globalsum; 2457 PetscInt i,j,s; 2458 PetscInt nlocals,first_index,old_index,max_local; 2459 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2460 PetscMPIInt *dof_sizes,*dof_displs; 2461 PetscBool first_found; 2462 PetscErrorCode ierr; 2463 2464 PetscFunctionBegin; 2465 /* mpi buffers */ 2466 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2467 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2468 j = ( !rank_prec_comm ? size_prec_comm : 0); 2469 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2470 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2471 /* get maximum size of subset */ 2472 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2473 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2474 max_local = 0; 2475 for (i=0;i<n_local_dofs;i++) { 2476 if (max_local < temp_global_dofs[i] ) { 2477 max_local = temp_global_dofs[i]; 2478 } 2479 } 2480 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2481 max_global++; 2482 max_local = 0; 2483 for (i=0;i<n_local_dofs;i++) { 2484 if (max_local < local_dofs[i] ) { 2485 max_local = local_dofs[i]; 2486 } 2487 } 2488 max_local++; 2489 /* allocate workspace */ 2490 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2491 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2492 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2493 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2494 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2495 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2496 /* create scatter */ 2497 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2498 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2499 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2500 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2501 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2502 /* init array */ 2503 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2504 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2505 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2506 if (local_dofs_mult) { 2507 for (i=0;i<n_local_dofs;i++) { 2508 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2509 } 2510 } else { 2511 for (i=0;i<n_local_dofs;i++) { 2512 array[local_dofs[i]]=1.0; 2513 } 2514 } 2515 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2516 /* scatter into global vec and get total number of global dofs */ 2517 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2518 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2519 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2520 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2521 /* Fill global_vec with cumulative function for global numbering */ 2522 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2523 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2524 nlocals = 0; 2525 first_index = -1; 2526 first_found = PETSC_FALSE; 2527 for (i=0;i<s;i++) { 2528 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2529 first_found = PETSC_TRUE; 2530 first_index = i; 2531 } 2532 nlocals += (PetscInt)PetscRealPart(array[i]); 2533 } 2534 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2535 if (!rank_prec_comm) { 2536 dof_displs[0]=0; 2537 for (i=1;i<size_prec_comm;i++) { 2538 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2539 } 2540 } 2541 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2542 if (first_found) { 2543 array[first_index] += (PetscScalar)nlocals; 2544 old_index = first_index; 2545 for (i=first_index+1;i<s;i++) { 2546 if (PetscRealPart(array[i]) > 0.1) { 2547 array[i] += array[old_index]; 2548 old_index = i; 2549 } 2550 } 2551 } 2552 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2553 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2554 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2555 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2556 /* get global ordering of local dofs */ 2557 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2558 if (local_dofs_mult) { 2559 for (i=0;i<n_local_dofs;i++) { 2560 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2561 } 2562 } else { 2563 for (i=0;i<n_local_dofs;i++) { 2564 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2565 } 2566 } 2567 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2568 /* free workspace */ 2569 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2570 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2571 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2572 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2573 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2574 /* return pointer to global ordering of local dofs */ 2575 *global_numbering_subset = temp_global_dofs; 2576 PetscFunctionReturn(0); 2577 } 2578 2579 #undef __FUNCT__ 2580 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2581 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2582 { 2583 PetscInt i,j; 2584 PetscScalar *alphas; 2585 PetscErrorCode ierr; 2586 2587 PetscFunctionBegin; 2588 /* this implements stabilized Gram-Schmidt */ 2589 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2590 for (i=0;i<n;i++) { 2591 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2592 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2593 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2594 } 2595 ierr = PetscFree(alphas);CHKERRQ(ierr); 2596 PetscFunctionReturn(0); 2597 } 2598 2599 #undef __FUNCT__ 2600 #define __FUNCT__ "MatISGetMPIXAIJ" 2601 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatReuse reuse, Mat *M) 2602 { 2603 Mat_IS *matis = (Mat_IS*)(mat->data); 2604 /* info on mat */ 2605 /* ISLocalToGlobalMapping rmapping,cmapping; */ 2606 PetscInt bs,rows,cols; 2607 PetscInt lrows,lcols; 2608 PetscInt local_rows,local_cols; 2609 PetscBool isdense,issbaij,issbaij_red; 2610 /* values insertion */ 2611 PetscScalar *array; 2612 PetscInt *local_indices,*global_indices; 2613 /* work */ 2614 PetscInt i,j,index_row; 2615 PetscErrorCode ierr; 2616 2617 PetscFunctionBegin; 2618 /* MISSING CHECKS 2619 - rectangular case not covered (it is not allowed by MATIS) 2620 */ 2621 /* get info from mat */ 2622 /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */ 2623 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2624 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2625 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2626 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2627 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2628 2629 /* work */ 2630 ierr = PetscMalloc1(local_rows,&local_indices);CHKERRQ(ierr); 2631 for (i=0;i<local_rows;i++) local_indices[i]=i; 2632 /* map indices of local mat to global values */ 2633 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2634 /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */ 2635 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2636 2637 if (issbaij) { 2638 ierr = MatGetRowUpperTriangular(matis->A);CHKERRQ(ierr); 2639 } 2640 2641 if (reuse == MAT_INITIAL_MATRIX) { 2642 Mat new_mat; 2643 MatType new_mat_type; 2644 Vec vec_dnz,vec_onz; 2645 PetscScalar *my_dnz,*my_onz; 2646 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2647 PetscInt index_col,owner; 2648 PetscMPIInt nsubdomains; 2649 2650 /* determining new matrix type */ 2651 ierr = MPI_Allreduce(&issbaij,&issbaij_red,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2652 if (issbaij_red) { 2653 new_mat_type = MATSBAIJ; 2654 } else { 2655 if (bs>1) { 2656 new_mat_type = MATBAIJ; 2657 } else { 2658 new_mat_type = MATAIJ; 2659 } 2660 } 2661 2662 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2663 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2664 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2665 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2666 ierr = MatSetType(new_mat,new_mat_type);CHKERRQ(ierr); 2667 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2668 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2669 2670 /* 2671 preallocation 2672 */ 2673 2674 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2675 /* 2676 Some vectors are needed to sum up properly on shared interface dofs. 2677 Preallocation macros cannot do the job. 2678 Note that preallocation is not exact, since it overestimates nonzeros 2679 */ 2680 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2681 /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */ 2682 ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr); 2683 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2684 /* All processes need to compute entire row ownership */ 2685 ierr = PetscMalloc1(rows,&row_ownership);CHKERRQ(ierr); 2686 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2687 for (i=0;i<nsubdomains;i++) { 2688 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2689 row_ownership[j]=i; 2690 } 2691 } 2692 2693 /* 2694 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2695 then, they will be summed up properly. This way, preallocation is always sufficient 2696 */ 2697 ierr = PetscMalloc1(local_rows,&my_dnz);CHKERRQ(ierr); 2698 ierr = PetscMalloc1(local_rows,&my_onz);CHKERRQ(ierr); 2699 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2700 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2701 /* preallocation as a MATAIJ */ 2702 if (isdense) { /* special case for dense local matrices */ 2703 for (i=0;i<local_rows;i++) { 2704 index_row = global_indices[i]; 2705 for (j=i;j<local_rows;j++) { 2706 owner = row_ownership[index_row]; 2707 index_col = global_indices[j]; 2708 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2709 my_dnz[i] += 1.0; 2710 } else { /* offdiag block */ 2711 my_onz[i] += 1.0; 2712 } 2713 /* same as before, interchanging rows and cols */ 2714 if (i != j) { 2715 owner = row_ownership[index_col]; 2716 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2717 my_dnz[j] += 1.0; 2718 } else { 2719 my_onz[j] += 1.0; 2720 } 2721 } 2722 } 2723 } 2724 } else { 2725 for (i=0;i<local_rows;i++) { 2726 PetscInt ncols; 2727 const PetscInt *cols; 2728 index_row = global_indices[i]; 2729 ierr = MatGetRow(matis->A,i,&ncols,&cols,NULL);CHKERRQ(ierr); 2730 for (j=0;j<ncols;j++) { 2731 owner = row_ownership[index_row]; 2732 index_col = global_indices[cols[j]]; 2733 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2734 my_dnz[i] += 1.0; 2735 } else { /* offdiag block */ 2736 my_onz[i] += 1.0; 2737 } 2738 /* same as before, interchanging rows and cols */ 2739 if (issbaij) { 2740 owner = row_ownership[index_col]; 2741 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2742 my_dnz[j] += 1.0; 2743 } else { 2744 my_onz[j] += 1.0; 2745 } 2746 } 2747 } 2748 ierr = MatRestoreRow(matis->A,i,&ncols,&cols,NULL);CHKERRQ(ierr); 2749 } 2750 } 2751 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2752 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2753 if (local_rows) { /* multilevel guard */ 2754 ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2755 ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2756 } 2757 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2758 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2759 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2760 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2761 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2762 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2763 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2764 2765 /* set computed preallocation in dnz and onz */ 2766 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2767 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2768 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2769 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2770 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2771 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2772 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2773 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2774 2775 /* Resize preallocation if overestimated */ 2776 for (i=0;i<lrows;i++) { 2777 dnz[i] = PetscMin(dnz[i],lcols); 2778 onz[i] = PetscMin(onz[i],cols-lcols); 2779 } 2780 /* set preallocation */ 2781 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2782 for (i=0;i<lrows/bs;i++) { 2783 dnz[i] = dnz[i*bs]/bs; 2784 onz[i] = onz[i*bs]/bs; 2785 } 2786 ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2787 for (i=0;i<lrows/bs;i++) { 2788 dnz[i] = dnz[i]-i; 2789 } 2790 ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2791 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2792 *M = new_mat; 2793 } else { 2794 PetscInt mbs,mrows,mcols; 2795 /* some checks */ 2796 ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); 2797 ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); 2798 if (mrows != rows) { 2799 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); 2800 } 2801 if (mrows != rows) { 2802 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); 2803 } 2804 if (mbs != bs) { 2805 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); 2806 } 2807 ierr = MatZeroEntries(*M);CHKERRQ(ierr); 2808 } 2809 /* set local to global mappings */ 2810 /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */ 2811 /* Set values */ 2812 if (isdense) { /* special case for dense local matrices */ 2813 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2814 ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr); 2815 ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2816 ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr); 2817 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2818 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2819 } else { /* very basic values insertion for all other matrix types */ 2820 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2821 for (i=0;i<local_rows;i++) { 2822 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2823 /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */ 2824 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2825 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2826 ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2827 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2828 } 2829 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2830 } 2831 ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2832 ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2833 if (isdense) { 2834 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2835 } 2836 if (issbaij) { 2837 ierr = MatRestoreRowUpperTriangular(matis->A);CHKERRQ(ierr); 2838 } 2839 PetscFunctionReturn(0); 2840 } 2841 2842 #undef __FUNCT__ 2843 #define __FUNCT__ "MatISGetSubassemblingPattern" 2844 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2845 { 2846 Mat subdomain_adj; 2847 IS new_ranks,ranks_send_to; 2848 MatPartitioning partitioner; 2849 Mat_IS *matis; 2850 PetscInt n_neighs,*neighs,*n_shared,**shared; 2851 PetscInt prank; 2852 PetscMPIInt size,rank,color; 2853 PetscInt *xadj,*adjncy,*oldranks; 2854 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2855 PetscInt i,j,local_size,threshold=0; 2856 PetscErrorCode ierr; 2857 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2858 PetscSubcomm subcomm; 2859 2860 PetscFunctionBegin; 2861 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2862 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2863 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2864 2865 /* Get info on mapping */ 2866 matis = (Mat_IS*)(mat->data); 2867 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2868 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2869 2870 /* build local CSR graph of subdomains' connectivity */ 2871 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2872 xadj[0] = 0; 2873 xadj[1] = PetscMax(n_neighs-1,0); 2874 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2875 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2876 2877 if (threshold) { 2878 PetscInt* count,min_threshold; 2879 ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr); 2880 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2881 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2882 for (j=0;j<n_shared[i];j++) { 2883 count[shared[i][j]] += 1; 2884 } 2885 } 2886 /* adapt threshold since we dont want to lose significant connections */ 2887 min_threshold = n_neighs; 2888 for (i=1;i<n_neighs;i++) { 2889 for (j=0;j<n_shared[i];j++) { 2890 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2891 } 2892 } 2893 threshold = PetscMax(min_threshold+1,threshold); 2894 xadj[1] = 0; 2895 for (i=1;i<n_neighs;i++) { 2896 for (j=0;j<n_shared[i];j++) { 2897 if (count[shared[i][j]] < threshold) { 2898 adjncy[xadj[1]] = neighs[i]; 2899 adjncy_wgt[xadj[1]] = n_shared[i]; 2900 xadj[1]++; 2901 break; 2902 } 2903 } 2904 } 2905 ierr = PetscFree(count);CHKERRQ(ierr); 2906 } else { 2907 if (xadj[1]) { 2908 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2909 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2910 } 2911 } 2912 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2913 if (use_square) { 2914 for (i=0;i<xadj[1];i++) { 2915 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2916 } 2917 } 2918 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2919 2920 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2921 2922 /* 2923 Restrict work on active processes only. 2924 */ 2925 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2926 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2927 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2928 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2929 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2930 if (color) { 2931 ierr = PetscFree(xadj);CHKERRQ(ierr); 2932 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2933 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2934 } else { 2935 PetscInt coarsening_ratio; 2936 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2937 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2938 prank = rank; 2939 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2940 /* 2941 for (i=0;i<size;i++) { 2942 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2943 } 2944 */ 2945 for (i=0;i<xadj[1];i++) { 2946 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2947 } 2948 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2949 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2950 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2951 2952 /* Partition */ 2953 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2954 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2955 if (use_vwgt) { 2956 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2957 v_wgt[0] = local_size; 2958 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2959 } 2960 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2961 coarsening_ratio = size/n_subdomains; 2962 /* Parmetis does not always give back nparts with small graphs! this should be taken into account */ 2963 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2964 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2965 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2966 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2967 2968 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2969 if (contiguous) { 2970 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2971 } else { 2972 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2973 } 2974 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2975 /* clean up */ 2976 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2977 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2978 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2979 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2980 } 2981 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2982 2983 /* assemble parallel IS for sends */ 2984 i = 1; 2985 if (color) i=0; 2986 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2987 2988 /* get back IS */ 2989 *is_sends = ranks_send_to; 2990 PetscFunctionReturn(0); 2991 } 2992 2993 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2994 2995 #undef __FUNCT__ 2996 #define __FUNCT__ "MatISSubassemble" 2997 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 2998 { 2999 Mat local_mat; 3000 Mat_IS *matis; 3001 IS is_sends_internal; 3002 PetscInt rows,cols; 3003 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3004 PetscBool ismatis,isdense,destroy_mat; 3005 ISLocalToGlobalMapping l2gmap; 3006 PetscInt* l2gmap_indices; 3007 const PetscInt* is_indices; 3008 MatType new_local_type; 3009 /* buffers */ 3010 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3011 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3012 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3013 /* MPI */ 3014 MPI_Comm comm,comm_n; 3015 PetscSubcomm subcomm; 3016 PetscMPIInt n_sends,n_recvs,commsize; 3017 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3018 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3019 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3020 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3021 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3022 PetscErrorCode ierr; 3023 3024 PetscFunctionBegin; 3025 /* TODO: add missing checks */ 3026 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3027 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3028 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3029 PetscValidLogicalCollectiveInt(mat,nis,7); 3030 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3031 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3032 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3033 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3034 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3035 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3036 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3037 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3038 PetscInt mrows,mcols,mnrows,mncols; 3039 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3040 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3041 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3042 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3043 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3044 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3045 } 3046 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3047 PetscValidLogicalCollectiveInt(mat,bs,0); 3048 /* prepare IS for sending if not provided */ 3049 if (!is_sends) { 3050 PetscBool pcontig = PETSC_TRUE; 3051 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3052 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3053 } else { 3054 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3055 is_sends_internal = is_sends; 3056 } 3057 3058 /* get pointer of MATIS data */ 3059 matis = (Mat_IS*)mat->data; 3060 3061 /* get comm */ 3062 comm = PetscObjectComm((PetscObject)mat); 3063 3064 /* compute number of sends */ 3065 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3066 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3067 3068 /* compute number of receives */ 3069 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3070 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3071 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3072 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3073 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3074 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3075 ierr = PetscFree(iflags);CHKERRQ(ierr); 3076 3077 /* restrict comm if requested */ 3078 subcomm = 0; 3079 destroy_mat = PETSC_FALSE; 3080 if (restrict_comm) { 3081 PetscMPIInt color,rank,subcommsize; 3082 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3083 color = 0; 3084 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3085 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3086 subcommsize = commsize - subcommsize; 3087 /* check if reuse has been requested */ 3088 if (reuse == MAT_REUSE_MATRIX) { 3089 if (*mat_n) { 3090 PetscMPIInt subcommsize2; 3091 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3092 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3093 comm_n = PetscObjectComm((PetscObject)*mat_n); 3094 } else { 3095 comm_n = PETSC_COMM_SELF; 3096 } 3097 } else { /* MAT_INITIAL_MATRIX */ 3098 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3099 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3100 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3101 comm_n = subcomm->comm; 3102 } 3103 /* flag to destroy *mat_n if not significative */ 3104 if (color) destroy_mat = PETSC_TRUE; 3105 } else { 3106 comm_n = comm; 3107 } 3108 3109 /* prepare send/receive buffers */ 3110 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3111 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3112 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3113 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3114 if (nis) { 3115 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr); 3116 ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr); 3117 } 3118 3119 /* Get data from local matrices */ 3120 if (!isdense) { 3121 /* TODO: See below some guidelines on how to prepare the local buffers */ 3122 /* 3123 send_buffer_vals should contain the raw values of the local matrix 3124 send_buffer_idxs should contain: 3125 - MatType_PRIVATE type 3126 - PetscInt size_of_l2gmap 3127 - PetscInt global_row_indices[size_of_l2gmap] 3128 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3129 */ 3130 } else { 3131 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3132 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3133 ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr); 3134 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3135 send_buffer_idxs[1] = i; 3136 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3137 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3138 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3139 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3140 for (i=0;i<n_sends;i++) { 3141 ilengths_vals[is_indices[i]] = len*len; 3142 ilengths_idxs[is_indices[i]] = len+2; 3143 } 3144 } 3145 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3146 /* additional is (if any) */ 3147 if (nis) { 3148 PetscMPIInt psum; 3149 PetscInt j; 3150 for (j=0,psum=0;j<nis;j++) { 3151 PetscInt plen; 3152 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3153 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3154 psum += len+1; /* indices + lenght */ 3155 } 3156 ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr); 3157 for (j=0,psum=0;j<nis;j++) { 3158 PetscInt plen; 3159 const PetscInt *is_array_idxs; 3160 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3161 send_buffer_idxs_is[psum] = plen; 3162 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3163 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3164 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3165 psum += plen+1; /* indices + lenght */ 3166 } 3167 for (i=0;i<n_sends;i++) { 3168 ilengths_idxs_is[is_indices[i]] = psum; 3169 } 3170 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3171 } 3172 3173 buf_size_idxs = 0; 3174 buf_size_vals = 0; 3175 buf_size_idxs_is = 0; 3176 for (i=0;i<n_recvs;i++) { 3177 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3178 buf_size_vals += (PetscInt)olengths_vals[i]; 3179 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3180 } 3181 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3182 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3183 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3184 3185 /* get new tags for clean communications */ 3186 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3187 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3188 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3189 3190 /* allocate for requests */ 3191 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3192 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3193 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3194 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3195 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3196 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3197 3198 /* communications */ 3199 ptr_idxs = recv_buffer_idxs; 3200 ptr_vals = recv_buffer_vals; 3201 ptr_idxs_is = recv_buffer_idxs_is; 3202 for (i=0;i<n_recvs;i++) { 3203 source_dest = onodes[i]; 3204 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3205 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3206 ptr_idxs += olengths_idxs[i]; 3207 ptr_vals += olengths_vals[i]; 3208 if (nis) { 3209 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3210 ptr_idxs_is += olengths_idxs_is[i]; 3211 } 3212 } 3213 for (i=0;i<n_sends;i++) { 3214 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3215 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3216 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3217 if (nis) { 3218 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3219 } 3220 } 3221 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3222 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3223 3224 /* assemble new l2g map */ 3225 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3226 ptr_idxs = recv_buffer_idxs; 3227 buf_size_idxs = 0; 3228 for (i=0;i<n_recvs;i++) { 3229 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3230 ptr_idxs += olengths_idxs[i]; 3231 } 3232 ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr); 3233 ptr_idxs = recv_buffer_idxs; 3234 buf_size_idxs = 0; 3235 for (i=0;i<n_recvs;i++) { 3236 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3237 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3238 ptr_idxs += olengths_idxs[i]; 3239 } 3240 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3241 ierr = ISLocalToGlobalMappingCreate(comm,1,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3242 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3243 3244 /* infer new local matrix type from received local matrices type */ 3245 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3246 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3247 if (n_recvs) { 3248 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3249 ptr_idxs = recv_buffer_idxs; 3250 for (i=0;i<n_recvs;i++) { 3251 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3252 new_local_type_private = MATAIJ_PRIVATE; 3253 break; 3254 } 3255 ptr_idxs += olengths_idxs[i]; 3256 } 3257 switch (new_local_type_private) { 3258 case MATDENSE_PRIVATE: 3259 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3260 new_local_type = MATSEQAIJ; 3261 bs = 1; 3262 } else { /* if I receive only 1 dense matrix */ 3263 new_local_type = MATSEQDENSE; 3264 bs = 1; 3265 } 3266 break; 3267 case MATAIJ_PRIVATE: 3268 new_local_type = MATSEQAIJ; 3269 bs = 1; 3270 break; 3271 case MATBAIJ_PRIVATE: 3272 new_local_type = MATSEQBAIJ; 3273 break; 3274 case MATSBAIJ_PRIVATE: 3275 new_local_type = MATSEQSBAIJ; 3276 break; 3277 default: 3278 SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3279 break; 3280 } 3281 } else { /* by default, new_local_type is seqdense */ 3282 new_local_type = MATSEQDENSE; 3283 bs = 1; 3284 } 3285 3286 /* create MATIS object if needed */ 3287 if (reuse == MAT_INITIAL_MATRIX) { 3288 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3289 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3290 } else { 3291 /* it also destroys the local matrices */ 3292 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3293 } 3294 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3295 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3296 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3297 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3298 3299 /* set values */ 3300 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3301 ptr_vals = recv_buffer_vals; 3302 ptr_idxs = recv_buffer_idxs; 3303 for (i=0;i<n_recvs;i++) { 3304 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3305 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3306 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3307 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3308 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3309 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3310 } else { 3311 /* TODO */ 3312 } 3313 ptr_idxs += olengths_idxs[i]; 3314 ptr_vals += olengths_vals[i]; 3315 } 3316 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3317 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3318 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3319 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3320 3321 #if 0 3322 if (!restrict_comm) { /* check */ 3323 Vec lvec,rvec; 3324 PetscReal infty_error; 3325 3326 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3327 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3328 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3329 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3330 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3331 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3332 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3333 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3334 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3335 } 3336 #endif 3337 3338 /* assemble new additional is (if any) */ 3339 if (nis) { 3340 PetscInt **temp_idxs,*count_is,j,psum; 3341 3342 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3343 ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr); 3344 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3345 ptr_idxs = recv_buffer_idxs_is; 3346 psum = 0; 3347 for (i=0;i<n_recvs;i++) { 3348 for (j=0;j<nis;j++) { 3349 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3350 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3351 psum += plen; 3352 ptr_idxs += plen+1; /* shift pointer to received data */ 3353 } 3354 } 3355 ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr); 3356 ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr); 3357 for (i=1;i<nis;i++) { 3358 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3359 } 3360 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3361 ptr_idxs = recv_buffer_idxs_is; 3362 for (i=0;i<n_recvs;i++) { 3363 for (j=0;j<nis;j++) { 3364 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3365 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3366 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3367 ptr_idxs += plen+1; /* shift pointer to received data */ 3368 } 3369 } 3370 for (i=0;i<nis;i++) { 3371 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3372 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3373 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3374 } 3375 ierr = PetscFree(count_is);CHKERRQ(ierr); 3376 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3377 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3378 } 3379 /* free workspace */ 3380 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3381 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3382 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3383 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3384 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3385 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3386 if (isdense) { 3387 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3388 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3389 } else { 3390 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3391 } 3392 if (nis) { 3393 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3394 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3395 } 3396 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3397 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3398 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3399 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3400 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3401 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3402 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3403 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3404 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3405 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3406 ierr = PetscFree(onodes);CHKERRQ(ierr); 3407 if (nis) { 3408 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3409 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3410 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3411 } 3412 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3413 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3414 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3415 for (i=0;i<nis;i++) { 3416 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3417 } 3418 } 3419 PetscFunctionReturn(0); 3420 } 3421 3422 /* temporary hack into ksp private data structure */ 3423 #include <petsc-private/kspimpl.h> 3424 3425 #undef __FUNCT__ 3426 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3427 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3428 { 3429 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3430 PC_IS *pcis = (PC_IS*)pc->data; 3431 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3432 MatNullSpace CoarseNullSpace=NULL; 3433 ISLocalToGlobalMapping coarse_islg; 3434 IS coarse_is,*isarray; 3435 PetscInt i,im_active=-1,active_procs=-1; 3436 PetscInt nis,nisdofs,nisneu; 3437 PC pc_temp; 3438 PCType coarse_pc_type; 3439 KSPType coarse_ksp_type; 3440 PetscBool multilevel_requested,multilevel_allowed; 3441 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3442 Mat t_coarse_mat_is; 3443 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3444 PetscMPIInt all_procs; 3445 PetscBool csin_ml,csin_ds,csin,csin_type_simple; 3446 PetscBool compute_vecs = PETSC_FALSE; 3447 PetscErrorCode ierr; 3448 3449 PetscFunctionBegin; 3450 /* Assign global numbering to coarse dofs */ 3451 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3452 compute_vecs = PETSC_TRUE; 3453 PetscInt ocoarse_size; 3454 ocoarse_size = pcbddc->coarse_size; 3455 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3456 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3457 /* see if we can avoid some work */ 3458 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3459 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3460 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3461 coarse_reuse = PETSC_FALSE; 3462 } else { /* we can safely reuse already computed coarse matrix */ 3463 coarse_reuse = PETSC_TRUE; 3464 } 3465 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3466 coarse_reuse = PETSC_FALSE; 3467 } 3468 /* reset any subassembling information */ 3469 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3470 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3471 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3472 coarse_reuse = PETSC_TRUE; 3473 } 3474 3475 /* count "active" (i.e. with positive local size) and "void" processes */ 3476 im_active = !!(pcis->n); 3477 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3478 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3479 void_procs = all_procs-active_procs; 3480 csin_type_simple = PETSC_TRUE; 3481 if (pcbddc->current_level) { 3482 csin_ml = PETSC_TRUE; 3483 ncoarse_ml = void_procs; 3484 csin_ds = PETSC_TRUE; 3485 ncoarse_ds = void_procs; 3486 if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 3487 } else { 3488 csin_ml = PETSC_FALSE; 3489 ncoarse_ml = all_procs; 3490 if (void_procs) { 3491 csin_ds = PETSC_TRUE; 3492 ncoarse_ds = void_procs; 3493 csin_type_simple = PETSC_FALSE; 3494 } else { 3495 csin_ds = PETSC_FALSE; 3496 ncoarse_ds = all_procs; 3497 } 3498 } 3499 3500 /* 3501 test if we can go multilevel: three conditions must be satisfied: 3502 - we have not exceeded the number of levels requested 3503 - we can actually subassemble the active processes 3504 - we can find a suitable number of MPI processes where we can place the subassembled problem 3505 */ 3506 multilevel_allowed = PETSC_FALSE; 3507 multilevel_requested = PETSC_FALSE; 3508 if (pcbddc->current_level < pcbddc->max_levels) { 3509 multilevel_requested = PETSC_TRUE; 3510 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3511 multilevel_allowed = PETSC_FALSE; 3512 } else { 3513 multilevel_allowed = PETSC_TRUE; 3514 } 3515 } 3516 /* determine number of process partecipating to coarse solver */ 3517 if (multilevel_allowed) { 3518 ncoarse = ncoarse_ml; 3519 csin = csin_ml; 3520 } else { 3521 ncoarse = ncoarse_ds; 3522 csin = csin_ds; 3523 } 3524 3525 /* creates temporary l2gmap and IS for coarse indexes */ 3526 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3527 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3528 3529 /* creates temporary MATIS object for coarse matrix */ 3530 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3531 #if 0 3532 { 3533 PetscViewer viewer; 3534 char filename[256]; 3535 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3536 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3537 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3538 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3539 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3540 } 3541 #endif 3542 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3543 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3544 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3545 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3546 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3547 3548 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3549 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3550 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3551 const PetscInt *idxs; 3552 ISLocalToGlobalMapping tmap; 3553 3554 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3555 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3556 /* allocate space for temporary storage */ 3557 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3558 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3559 /* allocate for IS array */ 3560 nisdofs = pcbddc->n_ISForDofsLocal; 3561 nisneu = !!pcbddc->NeumannBoundariesLocal; 3562 nis = nisdofs + nisneu; 3563 ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr); 3564 /* dofs splitting */ 3565 for (i=0;i<nisdofs;i++) { 3566 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3567 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3568 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3569 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3570 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3571 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3572 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3573 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3574 } 3575 /* neumann boundaries */ 3576 if (pcbddc->NeumannBoundariesLocal) { 3577 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3578 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3579 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3580 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3581 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3582 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3583 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3584 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3585 } 3586 /* free memory */ 3587 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3588 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3589 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3590 } else { 3591 nis = 0; 3592 nisdofs = 0; 3593 nisneu = 0; 3594 isarray = NULL; 3595 } 3596 /* destroy no longer needed map */ 3597 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3598 3599 /* restrict on coarse candidates (if needed) */ 3600 coarse_mat_is = NULL; 3601 if (csin) { 3602 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3603 PetscInt j,tissize,*nisindices; 3604 PetscInt *coarse_candidates; 3605 const PetscInt* tisindices; 3606 /* get coarse candidates' ranks in pc communicator */ 3607 ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr); 3608 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3609 for (i=0,j=0;i<all_procs;i++) { 3610 if (!coarse_candidates[i]) { 3611 coarse_candidates[j]=i; 3612 j++; 3613 } 3614 } 3615 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3616 /* get a suitable subassembling pattern */ 3617 if (csin_type_simple) { 3618 PetscMPIInt rank; 3619 PetscInt issize,isidx; 3620 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3621 if (im_active) { 3622 issize = 1; 3623 isidx = (PetscInt)rank; 3624 } else { 3625 issize = 0; 3626 isidx = -1; 3627 } 3628 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3629 } else { 3630 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3631 } 3632 if (pcbddc->dbg_flag) { 3633 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3634 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3635 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3636 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3637 for (i=0;i<j;i++) { 3638 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3639 } 3640 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3641 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3642 } 3643 /* shift the pattern on coarse candidates */ 3644 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3645 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3646 ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr); 3647 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3648 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3649 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3650 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3651 } 3652 if (pcbddc->dbg_flag) { 3653 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3654 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3655 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3656 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3657 } 3658 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3659 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3660 } else { 3661 if (pcbddc->dbg_flag) { 3662 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3663 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3664 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3665 } 3666 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3667 coarse_mat_is = t_coarse_mat_is; 3668 } 3669 3670 /* create local to global scatters for coarse problem */ 3671 if (compute_vecs) { 3672 PetscInt lrows; 3673 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3674 if (coarse_mat_is) { 3675 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3676 } else { 3677 lrows = 0; 3678 } 3679 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3680 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3681 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3682 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3683 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3684 } 3685 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3686 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3687 3688 /* set defaults for coarse KSP and PC */ 3689 if (multilevel_allowed) { 3690 coarse_ksp_type = KSPRICHARDSON; 3691 coarse_pc_type = PCBDDC; 3692 } else { 3693 coarse_ksp_type = KSPPREONLY; 3694 coarse_pc_type = PCREDUNDANT; 3695 } 3696 3697 /* print some info if requested */ 3698 if (pcbddc->dbg_flag) { 3699 if (!multilevel_allowed) { 3700 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3701 if (multilevel_requested) { 3702 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3703 } else if (pcbddc->max_levels) { 3704 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3705 } 3706 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3707 } 3708 } 3709 3710 /* create the coarse KSP object only once with defaults */ 3711 if (coarse_mat_is) { 3712 MatReuse coarse_mat_reuse; 3713 PetscViewer dbg_viewer; 3714 if (pcbddc->dbg_flag) { 3715 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3716 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3717 } 3718 if (!pcbddc->coarse_ksp) { 3719 char prefix[256],str_level[16]; 3720 size_t len; 3721 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3722 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3723 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3724 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3725 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3726 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3727 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3728 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3729 /* prefix */ 3730 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3731 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3732 if (!pcbddc->current_level) { 3733 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3734 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3735 } else { 3736 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3737 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3738 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3739 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3740 *(prefix+len)='\0'; 3741 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3742 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3743 } 3744 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3745 /* allow user customization */ 3746 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3747 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3748 } 3749 3750 /* get some info after set from options */ 3751 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3752 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3753 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3754 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3755 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3756 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3757 isbddc = PETSC_FALSE; 3758 } 3759 if (isredundant) { 3760 KSP inner_ksp; 3761 PC inner_pc; 3762 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3763 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3764 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3765 } 3766 3767 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3768 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3769 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3770 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3771 if (nisdofs) { 3772 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3773 for (i=0;i<nisdofs;i++) { 3774 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3775 } 3776 } 3777 if (nisneu) { 3778 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3779 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3780 } 3781 3782 /* assemble coarse matrix */ 3783 if (coarse_reuse) { 3784 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3785 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3786 coarse_mat_reuse = MAT_REUSE_MATRIX; 3787 } else { 3788 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3789 } 3790 if (isbddc || isnn) { 3791 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3792 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3793 if (pcbddc->dbg_flag) { 3794 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3795 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3796 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3797 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3798 } 3799 } 3800 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3801 } else { 3802 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3803 } 3804 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3805 3806 /* propagate symmetry info to coarse matrix */ 3807 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3808 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3809 3810 /* set operators */ 3811 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3812 if (pcbddc->dbg_flag) { 3813 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3814 } 3815 } else { /* processes non partecipating to coarse solver (if any) */ 3816 coarse_mat = 0; 3817 } 3818 ierr = PetscFree(isarray);CHKERRQ(ierr); 3819 #if 0 3820 { 3821 PetscViewer viewer; 3822 char filename[256]; 3823 sprintf(filename,"coarse_mat.m"); 3824 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3825 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3826 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3827 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3828 } 3829 #endif 3830 3831 /* Compute coarse null space (special handling by BDDC only) */ 3832 if (pcbddc->NullSpace) { 3833 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3834 } 3835 3836 if (pcbddc->coarse_ksp) { 3837 Vec crhs,csol; 3838 PetscBool ispreonly; 3839 if (CoarseNullSpace) { 3840 if (isbddc) { 3841 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3842 } else { 3843 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3844 } 3845 } 3846 /* setup coarse ksp */ 3847 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3848 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3849 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3850 /* hack */ 3851 if (!csol) { 3852 ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3853 } 3854 if (!crhs) { 3855 ierr = MatGetVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3856 } 3857 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3858 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3859 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3860 KSP check_ksp; 3861 KSPType check_ksp_type; 3862 PC check_pc; 3863 Vec check_vec,coarse_vec; 3864 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3865 PetscInt its; 3866 PetscBool compute_eigs; 3867 PetscReal *eigs_r,*eigs_c; 3868 PetscInt neigs; 3869 const char *prefix; 3870 3871 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3872 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3873 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3874 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3875 if (ispreonly) { 3876 check_ksp_type = KSPPREONLY; 3877 compute_eigs = PETSC_FALSE; 3878 } else { 3879 check_ksp_type = KSPGMRES; 3880 compute_eigs = PETSC_TRUE; 3881 } 3882 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3883 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3884 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3885 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3886 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 3887 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 3888 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 3889 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 3890 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3891 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3892 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3893 /* create random vec */ 3894 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 3895 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 3896 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3897 if (CoarseNullSpace) { 3898 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3899 } 3900 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3901 /* solve coarse problem */ 3902 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 3903 if (CoarseNullSpace) { 3904 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 3905 } 3906 /* set eigenvalue estimation if preonly has not been requested */ 3907 if (compute_eigs) { 3908 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr); 3909 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr); 3910 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 3911 lambda_max = eigs_r[neigs-1]; 3912 lambda_min = eigs_r[0]; 3913 if (pcbddc->use_coarse_estimates) { 3914 if (lambda_max>lambda_min) { 3915 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3916 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3917 } 3918 } 3919 } 3920 3921 /* check coarse problem residual error */ 3922 if (pcbddc->dbg_flag) { 3923 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 3924 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3925 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 3926 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3927 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3928 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3929 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3930 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 3931 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 3932 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 3933 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3934 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3935 if (compute_eigs) { 3936 PetscReal lambda_max_s,lambda_min_s; 3937 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3938 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 3939 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 3940 for (i=0;i<neigs;i++) { 3941 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 3942 } 3943 } 3944 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3945 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3946 } 3947 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3948 if (compute_eigs) { 3949 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 3950 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 3951 } 3952 } 3953 } 3954 /* print additional info */ 3955 if (pcbddc->dbg_flag) { 3956 /* waits until all processes reaches this point */ 3957 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 3958 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3959 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3960 } 3961 3962 /* free memory */ 3963 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3964 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3965 PetscFunctionReturn(0); 3966 } 3967 3968 #undef __FUNCT__ 3969 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3970 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3971 { 3972 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3973 PC_IS* pcis = (PC_IS*)pc->data; 3974 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3975 PetscInt i,coarse_size; 3976 PetscInt *local_primal_indices; 3977 PetscErrorCode ierr; 3978 3979 PetscFunctionBegin; 3980 /* Compute global number of coarse dofs */ 3981 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3982 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3983 } 3984 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3985 3986 /* check numbering */ 3987 if (pcbddc->dbg_flag) { 3988 PetscScalar coarsesum,*array; 3989 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 3990 3991 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3992 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3993 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3994 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3995 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3996 for (i=0;i<pcbddc->local_primal_size;i++) { 3997 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3998 } 3999 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4000 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4001 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4002 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4003 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4004 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4005 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4006 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4007 for (i=0;i<pcis->n;i++) { 4008 if (array[i] == 1.0) { 4009 set_error = PETSC_TRUE; 4010 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4011 } 4012 } 4013 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4014 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4015 for (i=0;i<pcis->n;i++) { 4016 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4017 } 4018 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4019 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4020 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4021 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4022 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4023 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4024 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4025 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4026 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4027 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4028 for (i=0;i<pcbddc->local_primal_size;i++) { 4029 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4030 } 4031 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4032 } 4033 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4034 if (set_error_reduced) { 4035 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4036 } 4037 } 4038 /* get back data */ 4039 *coarse_size_n = coarse_size; 4040 *local_primal_indices_n = local_primal_indices; 4041 PetscFunctionReturn(0); 4042 } 4043 4044 #undef __FUNCT__ 4045 #define __FUNCT__ "PCBDDCGlobalToLocal" 4046 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4047 { 4048 IS localis_t; 4049 PetscInt i,lsize,*idxs,n; 4050 PetscScalar *vals; 4051 PetscErrorCode ierr; 4052 4053 PetscFunctionBegin; 4054 /* get indices in local ordering exploiting local to global map */ 4055 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4056 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 4057 for (i=0;i<lsize;i++) vals[i] = 1.0; 4058 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4059 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4060 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4061 if (idxs) { /* multilevel guard */ 4062 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4063 } 4064 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4065 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4066 ierr = PetscFree(vals);CHKERRQ(ierr); 4067 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4068 /* now compute set in local ordering */ 4069 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4070 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4071 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4072 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4073 for (i=0,lsize=0;i<n;i++) { 4074 if (PetscRealPart(vals[i]) > 0.5) { 4075 lsize++; 4076 } 4077 } 4078 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 4079 for (i=0,lsize=0;i<n;i++) { 4080 if (PetscRealPart(vals[i]) > 0.5) { 4081 idxs[lsize++] = i; 4082 } 4083 } 4084 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4085 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4086 *localis = localis_t; 4087 PetscFunctionReturn(0); 4088 } 4089