1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 76 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCResetSolvers" 82 PetscErrorCode PCBDDCResetSolvers(PC pc) 83 { 84 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 89 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 94 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 100 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 101 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 104 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 107 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 108 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 110 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 111 PetscFunctionReturn(0); 112 } 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 116 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 117 { 118 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 119 PC_IS *pcis = (PC_IS*)pc->data; 120 VecType impVecType; 121 PetscInt n_constraints,n_R,old_size; 122 PetscErrorCode ierr; 123 124 PetscFunctionBegin; 125 if (!pcbddc->ConstraintMatrix) { 126 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 127 } 128 /* get sizes */ 129 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 130 n_R = pcis->n-pcbddc->n_actual_vertices; 131 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 132 /* local work vectors (try to avoid unneeded work)*/ 133 /* R nodes */ 134 old_size = -1; 135 if (pcbddc->vec1_R) { 136 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 137 } 138 if (n_R != old_size) { 139 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 140 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 141 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 142 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 143 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 144 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 145 } 146 /* local primal dofs */ 147 old_size = -1; 148 if (pcbddc->vec1_P) { 149 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 150 } 151 if (pcbddc->local_primal_size != old_size) { 152 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 153 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 154 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 155 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 156 } 157 /* local explicit constraints */ 158 old_size = -1; 159 if (pcbddc->vec1_C) { 160 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 161 } 162 if (n_constraints && n_constraints != old_size) { 163 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 164 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 165 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 166 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNCT__ 172 #define __FUNCT__ "PCBDDCSetUpCorrection" 173 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 174 { 175 PetscErrorCode ierr; 176 /* pointers to pcis and pcbddc */ 177 PC_IS* pcis = (PC_IS*)pc->data; 178 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 179 /* submatrices of local problem */ 180 Mat A_RV,A_VR,A_VV; 181 /* working matrices */ 182 Mat M1,M2,M3,C_CR; 183 /* working vectors */ 184 Vec vec1_C,vec2_C,vec1_V,vec2_V; 185 /* additional working stuff */ 186 IS is_aux; 187 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 188 const PetscScalar *array,*row_cmat_values; 189 const PetscInt *row_cmat_indices,*idx_R_local; 190 PetscInt *idx_V_B,*auxindices; 191 PetscInt n_vertices,n_constraints,size_of_constraint; 192 PetscInt i,j,n_R,n_D,n_B; 193 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check; 194 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 195 MatType impMatType; 196 /* some shortcuts to scalars */ 197 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 198 /* for debugging purposes */ 199 PetscReal *coarsefunctions_errors,*constraints_errors; 200 201 PetscFunctionBegin; 202 /* get number of vertices (corners plus constraints with change of basis) 203 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 204 n_vertices = pcbddc->n_actual_vertices; 205 n_constraints = pcbddc->local_primal_size-n_vertices; 206 /* Set Non-overlapping dimensions */ 207 n_B = pcis->n_B; n_D = pcis->n - n_B; 208 n_R = pcis->n-n_vertices; 209 210 /* Set types for local objects needed by BDDC precondtioner */ 211 impMatType = MATSEQDENSE; 212 213 /* Allocating some extra storage just to be safe */ 214 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 215 for (i=0;i<pcis->n;i++) auxindices[i]=i; 216 217 /* vertices in boundary numbering */ 218 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 219 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 220 if (i != n_vertices) { 221 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 222 } 223 224 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 225 if (n_constraints) { 226 /* see if we can save some allocations */ 227 if (pcbddc->local_auxmat2) { 228 PetscInt on_R,on_constraints; 229 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 230 if (on_R != n_R || on_constraints != n_constraints) { 231 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 232 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 233 } 234 } 235 /* work vectors */ 236 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 237 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 238 /* auxiliary matrices */ 239 if (!pcbddc->local_auxmat2) { 240 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 241 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 242 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 243 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 244 } 245 246 /* Extract constraints on R nodes: C_{CR} */ 247 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 248 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 249 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 250 251 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 252 for (i=0;i<n_constraints;i++) { 253 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 254 /* Get row of constraint matrix in R numbering */ 255 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 256 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 257 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 258 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 259 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 260 /* Solve for row of constraint matrix in R numbering */ 261 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 262 /* Set values in local_auxmat2 */ 263 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 264 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 265 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 266 } 267 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 268 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 269 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 270 271 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 272 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 273 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 274 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 275 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 276 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 277 ierr = MatSetUp(M1);CHKERRQ(ierr); 278 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 279 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 280 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 281 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 282 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 283 ierr = MatDestroy(&M2);CHKERRQ(ierr); 284 ierr = MatDestroy(&M3);CHKERRQ(ierr); 285 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 286 if (!pcbddc->local_auxmat1) { 287 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 288 } else { 289 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 290 } 291 } 292 293 /* Get submatrices from subdomain matrix */ 294 if (n_vertices) { 295 PetscInt ibs,mbs; 296 PetscBool issbaij; 297 Mat newmat; 298 299 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 300 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 301 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 302 if (ibs != mbs) { /* need to convert to SEQAIJ */ 303 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 304 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 305 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 307 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 308 } else { 309 /* this is safe */ 310 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 311 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 312 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 313 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 314 /* which of the two approaches is faster? */ 315 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 316 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 317 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 318 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 319 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 320 } else { 321 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 322 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 323 } 324 } 325 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 326 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 327 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 328 } 329 330 /* Matrix of coarse basis functions (local) */ 331 if (pcbddc->coarse_phi_B) { 332 PetscInt on_B,on_primal; 333 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 334 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 335 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 336 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 337 } 338 } 339 if (pcbddc->coarse_phi_D) { 340 PetscInt on_D,on_primal; 341 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 342 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 343 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 344 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 345 } 346 } 347 if (!pcbddc->coarse_phi_B) { 348 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 349 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 350 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 351 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 352 } 353 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 354 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 355 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 356 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 357 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 358 } 359 360 if (pcbddc->dbg_flag) { 361 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 362 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 363 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 364 } 365 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 366 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 367 368 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 369 370 /* vertices */ 371 for (i=0;i<n_vertices;i++) { 372 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 373 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 374 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 375 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 376 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 377 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 378 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 379 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 380 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 381 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 382 if (n_constraints) { 383 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 384 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 385 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 386 } 387 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 388 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 389 390 /* Set values in coarse basis function and subdomain part of coarse_mat */ 391 /* coarse basis functions */ 392 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 393 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 394 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 395 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 396 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 397 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 398 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 399 if (pcbddc->switch_static || pcbddc->dbg_flag) { 400 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 401 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 402 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 403 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 404 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 405 } 406 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 407 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 408 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 409 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 410 if (n_constraints) { 411 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 412 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 413 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 414 } 415 416 /* check */ 417 if (pcbddc->dbg_flag) { 418 /* assemble subdomain vector on local nodes */ 419 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 420 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 421 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 422 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 423 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 425 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 426 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 427 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 428 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 429 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 430 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 431 if (n_constraints) { 432 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 433 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 434 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 435 } 436 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 437 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 438 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 439 /* check saddle point solution */ 440 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 441 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 442 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 443 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 444 /* shift by the identity matrix */ 445 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 446 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 447 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 449 } 450 } 451 452 /* constraints */ 453 for (i=0;i<n_constraints;i++) { 454 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 455 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 456 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 457 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 458 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 459 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 460 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 461 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 462 if (n_vertices) { 463 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 464 } 465 /* Set values in coarse basis function and subdomain part of coarse_mat */ 466 /* coarse basis functions */ 467 j = i+n_vertices; /* don't touch this! */ 468 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 469 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 470 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 472 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 473 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 474 if (pcbddc->switch_static || pcbddc->dbg_flag) { 475 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 476 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 478 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 479 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 480 } 481 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 482 if (n_vertices) { 483 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 484 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 485 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 486 } 487 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 488 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 489 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 490 491 if (pcbddc->dbg_flag) { 492 /* assemble subdomain vector on nodes */ 493 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 494 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 495 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 496 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 497 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 498 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 499 /* assemble subdomain vector of lagrange multipliers */ 500 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 501 if (n_vertices) { 502 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 503 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 504 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 505 } 506 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 507 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 508 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 509 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 510 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 511 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 512 /* check saddle point solution */ 513 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 514 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 515 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 516 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 517 /* shift by the identity matrix */ 518 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 519 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 520 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 521 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 522 } 523 } 524 /* call assembling routines for local coarse basis */ 525 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 526 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 527 if (pcbddc->switch_static || pcbddc->dbg_flag) { 528 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 529 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 530 } 531 532 /* compute other basis functions for non-symmetric problems */ 533 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 534 if (!setsym || (setsym && !issym)) { 535 if (!pcbddc->coarse_psi_B) { 536 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 537 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 538 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 539 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 540 } 541 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 542 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 543 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 544 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 545 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 546 } 547 for (i=0;i<pcbddc->local_primal_size;i++) { 548 if (n_constraints) { 549 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 550 for (j=0;j<n_constraints;j++) { 551 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 552 } 553 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 554 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 555 } 556 if (i<n_vertices) { 557 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 558 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 559 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 560 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 561 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 562 if (n_constraints) { 563 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 564 } 565 } else { 566 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 567 } 568 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 569 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 570 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 571 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 572 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 573 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 574 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 575 if (i<n_vertices) { 576 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 577 } 578 if (pcbddc->switch_static || pcbddc->dbg_flag) { 579 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 580 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 581 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 582 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 583 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 584 } 585 586 if (pcbddc->dbg_flag) { 587 /* assemble subdomain vector on nodes */ 588 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 589 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 590 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 591 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 592 if (i<n_vertices) { 593 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 594 } 595 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 596 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 597 /* assemble subdomain vector of lagrange multipliers */ 598 for (j=0;j<pcbddc->local_primal_size;j++) { 599 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 600 } 601 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 602 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 603 /* check saddle point solution */ 604 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 605 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 606 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 607 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 608 /* shift by the identity matrix */ 609 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 610 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 611 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 612 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 613 } 614 } 615 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 616 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 617 if (pcbddc->switch_static || pcbddc->dbg_flag) { 618 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 619 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 620 } 621 unsymmetric_check = PETSC_TRUE; 622 } else { /* take references to already computed coarse basis */ 623 unsymmetric_check = PETSC_FALSE; 624 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 625 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 626 if (pcbddc->coarse_phi_D) { 627 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 628 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 629 } 630 } 631 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 632 /* Checking coarse_sub_mat and coarse basis functios */ 633 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 634 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 635 if (pcbddc->dbg_flag) { 636 Mat coarse_sub_mat; 637 Mat AUXMAT,TM1,TM2,TM3,TM4; 638 Mat coarse_phi_D,coarse_phi_B; 639 Mat coarse_psi_D,coarse_psi_B; 640 Mat A_II,A_BB,A_IB,A_BI; 641 MatType checkmattype=MATSEQAIJ; 642 PetscReal real_value; 643 644 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 645 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 646 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 647 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 648 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 649 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 650 if (unsymmetric_check) { 651 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 652 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 653 } 654 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 655 656 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 657 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 658 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 659 if (unsymmetric_check) { 660 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 661 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 662 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 663 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 664 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 665 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 666 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 667 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 668 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 669 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 670 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 671 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 672 } else { 673 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 674 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 675 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 676 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 677 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 678 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 679 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 680 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 681 } 682 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 683 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 684 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 685 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 686 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 687 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 688 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 689 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 693 for (i=0;i<pcbddc->local_primal_size;i++) { 694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 695 } 696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 697 for (i=0;i<pcbddc->local_primal_size;i++) { 698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 699 } 700 if (unsymmetric_check) { 701 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 702 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 704 } 705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 706 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 708 } 709 } 710 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 711 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 712 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 713 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 714 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 715 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 716 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 717 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 718 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 719 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 720 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 721 if (unsymmetric_check) { 722 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 723 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 724 } 725 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 726 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 727 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 728 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 729 } 730 /* free memory */ 731 if (n_vertices) { 732 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 733 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 734 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 735 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 736 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 737 } 738 if (n_constraints) { 739 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 740 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 741 ierr = MatDestroy(&M1);CHKERRQ(ierr); 742 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 743 } 744 ierr = PetscFree(auxindices);CHKERRQ(ierr); 745 /* get back data */ 746 *coarse_submat_vals_n = coarse_submat_vals; 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNCT__ 751 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 752 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 753 { 754 PC_IS* pcis = (PC_IS*)(pc->data); 755 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 756 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 757 PetscBool issbaij,isseqaij; 758 /* manage repeated solves */ 759 MatReuse reuse; 760 MatStructure matstruct; 761 PetscErrorCode ierr; 762 763 PetscFunctionBegin; 764 if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) { 765 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 766 } 767 /* get mat flags */ 768 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 769 reuse = MAT_INITIAL_MATRIX; 770 if (pc->setupcalled) { 771 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 772 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 773 if (matstruct == SAME_NONZERO_PATTERN) { 774 reuse = MAT_REUSE_MATRIX; 775 } else { 776 reuse = MAT_INITIAL_MATRIX; 777 } 778 } 779 if (reuse == MAT_INITIAL_MATRIX) { 780 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 781 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 782 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 783 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 784 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 785 } 786 787 /* transform local matrices if needed */ 788 if (pcbddc->use_change_of_basis) { 789 Mat change_mat_all; 790 PetscScalar *row_cmat_values; 791 PetscInt *row_cmat_indices; 792 PetscInt *nnz,*is_indices,*temp_indices; 793 PetscInt i,j,k,n_D,n_B; 794 795 /* Get Non-overlapping dimensions */ 796 n_B = pcis->n_B; 797 n_D = pcis->n-n_B; 798 799 /* compute nonzero structure of change of basis on all local nodes */ 800 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 801 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 802 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 803 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 804 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 805 k=1; 806 for (i=0;i<n_B;i++) { 807 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 808 nnz[is_indices[i]]=j; 809 if (k < j) k = j; 810 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 811 } 812 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 813 /* assemble change of basis matrix on the whole set of local dofs */ 814 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 815 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 816 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 817 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 818 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 819 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 820 for (i=0;i<n_D;i++) { 821 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 822 } 823 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 824 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 825 for (i=0;i<n_B;i++) { 826 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 827 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 828 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 829 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 830 } 831 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 832 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 833 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 834 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr); 835 if (isseqaij) { 836 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 837 } else { 838 Mat work_mat; 839 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 840 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 841 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 842 } 843 /* 844 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 845 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 846 */ 847 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 848 ierr = PetscFree(nnz);CHKERRQ(ierr); 849 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 850 } else { 851 /* without change of basis, the local matrix is unchanged */ 852 if (!pcbddc->local_mat) { 853 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 854 pcbddc->local_mat = matis->A; 855 } 856 } 857 858 /* get submatrices */ 859 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 860 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 861 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 862 if (!issbaij) { 863 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 864 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 865 } else { 866 Mat newmat; 867 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 868 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 869 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 870 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 871 } 872 PetscFunctionReturn(0); 873 } 874 875 #undef __FUNCT__ 876 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 877 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 878 { 879 PC_IS* pcis = (PC_IS*)(pc->data); 880 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 881 IS is_aux1,is_aux2; 882 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 883 PetscInt n_vertices,i,j,n_R,n_D,n_B; 884 PetscInt vbs,bs; 885 PetscBT bitmask; 886 PetscErrorCode ierr; 887 888 PetscFunctionBegin; 889 /* 890 No need to setup local scatters if 891 - primal space is unchanged 892 AND 893 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 894 AND 895 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 896 */ 897 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 898 PetscFunctionReturn(0); 899 } 900 /* destroy old objects */ 901 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 902 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 903 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 904 /* Set Non-overlapping dimensions */ 905 n_B = pcis->n_B; n_D = pcis->n - n_B; 906 n_vertices = pcbddc->n_actual_vertices; 907 /* create auxiliary bitmask */ 908 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 909 for (i=0;i<n_vertices;i++) { 910 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 911 } 912 913 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 914 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 915 for (i=0, n_R=0; i<pcis->n; i++) { 916 if (!PetscBTLookup(bitmask,i)) { 917 idx_R_local[n_R] = i; 918 n_R++; 919 } 920 } 921 922 /* Block code */ 923 vbs = 1; 924 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 925 if (bs>1 && !(n_vertices%bs)) { 926 PetscBool is_blocked = PETSC_TRUE; 927 PetscInt *vary; 928 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 929 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 930 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 931 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 932 for (i=0; i<n_vertices; i++) { 933 if (vary[i]!=0 && vary[i]!=bs) { 934 is_blocked = PETSC_FALSE; 935 break; 936 } 937 } 938 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 939 vbs = bs; 940 for (i=0;i<n_R/vbs;i++) { 941 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 942 } 943 } 944 ierr = PetscFree(vary);CHKERRQ(ierr); 945 } 946 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 947 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 948 949 /* print some info if requested */ 950 if (pcbddc->dbg_flag) { 951 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 952 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 953 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 954 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 956 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 957 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 958 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 959 } 960 961 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 962 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 963 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 964 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 965 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 966 for (i=0; i<n_D; i++) { 967 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 968 } 969 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 970 for (i=0, j=0; i<n_R; i++) { 971 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 972 aux_array1[j++] = i; 973 } 974 } 975 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 976 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 977 for (i=0, j=0; i<n_B; i++) { 978 if (!PetscBTLookup(bitmask,is_indices[i])) { 979 aux_array2[j++] = i; 980 } 981 } 982 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 983 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 984 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 985 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 986 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 987 988 if (pcbddc->switch_static || pcbddc->dbg_flag) { 989 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 990 for (i=0, j=0; i<n_R; i++) { 991 if (PetscBTLookup(bitmask,idx_R_local[i])) { 992 aux_array1[j++] = i; 993 } 994 } 995 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 996 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 997 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 998 } 999 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1000 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1007 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1008 { 1009 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1010 PC_IS *pcis = (PC_IS*)pc->data; 1011 PC pc_temp; 1012 Mat A_RR; 1013 MatStructure matstruct; 1014 MatReuse reuse; 1015 PetscScalar m_one = -1.0; 1016 PetscReal value; 1017 PetscInt n_D,n_R,ibs,mbs; 1018 PetscBool use_exact,use_exact_reduced,issbaij; 1019 PetscErrorCode ierr; 1020 /* prefixes stuff */ 1021 char dir_prefix[256],neu_prefix[256],str_level[16]; 1022 size_t len; 1023 1024 PetscFunctionBegin; 1025 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 1026 1027 /* compute prefixes */ 1028 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1029 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1030 if (!pcbddc->current_level) { 1031 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1032 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1033 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1034 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1035 } else { 1036 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1037 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1038 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1039 len -= 15; /* remove "pc_bddc_coarse_" */ 1040 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1041 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1042 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1043 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1044 *(dir_prefix+len)='\0'; 1045 *(neu_prefix+len)='\0'; 1046 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1047 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1048 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1049 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1050 } 1051 1052 /* DIRICHLET PROBLEM */ 1053 /* Matrix for Dirichlet problem is pcis->A_II */ 1054 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1055 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1056 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1057 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1058 /* default */ 1059 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1060 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1061 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1062 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1063 if (issbaij) { 1064 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1065 } else { 1066 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1067 } 1068 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1069 /* Allow user's customization */ 1070 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1071 } 1072 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 1073 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1074 if (!n_D) { 1075 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1076 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1077 } 1078 /* Set Up KSP for Dirichlet problem of BDDC */ 1079 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1080 /* set ksp_D into pcis data */ 1081 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1082 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1083 pcis->ksp_D = pcbddc->ksp_D; 1084 1085 /* NEUMANN PROBLEM */ 1086 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1087 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1088 if (pcbddc->ksp_R) { /* already created ksp */ 1089 PetscInt nn_R; 1090 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr); 1091 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1092 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1093 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1094 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1095 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1096 reuse = MAT_INITIAL_MATRIX; 1097 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1098 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1099 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1100 reuse = MAT_INITIAL_MATRIX; 1101 } else { /* safe to reuse the matrix */ 1102 reuse = MAT_REUSE_MATRIX; 1103 } 1104 } 1105 /* last check */ 1106 if (matstruct == DIFFERENT_NONZERO_PATTERN) { 1107 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1108 reuse = MAT_INITIAL_MATRIX; 1109 } 1110 } else { /* first time, so we need to create the matrix */ 1111 reuse = MAT_INITIAL_MATRIX; 1112 } 1113 /* extract A_RR */ 1114 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1115 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1116 if (ibs != mbs) { 1117 Mat newmat; 1118 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1119 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1120 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1121 } else { 1122 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1123 } 1124 if (!pcbddc->ksp_R) { /* create object if not present */ 1125 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1126 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1127 /* default */ 1128 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1129 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1130 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1131 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1132 if (issbaij) { 1133 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1134 } else { 1135 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1136 } 1137 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1138 /* Allow user's customization */ 1139 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1140 } 1141 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1142 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1143 if (!n_R) { 1144 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1145 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1146 } 1147 /* Set Up KSP for Neumann problem of BDDC */ 1148 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1149 1150 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1151 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1152 /* Dirichlet */ 1153 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1154 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1155 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1156 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1157 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1158 /* need to be adapted? */ 1159 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1160 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1161 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1162 /* print info */ 1163 if (pcbddc->dbg_flag) { 1164 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1165 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1166 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1167 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1168 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1169 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1170 } 1171 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1172 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1173 } 1174 1175 /* Neumann */ 1176 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1177 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1178 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1179 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1180 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1181 /* need to be adapted? */ 1182 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1183 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1184 /* print info */ 1185 if (pcbddc->dbg_flag) { 1186 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1187 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1188 } 1189 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1190 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1191 } 1192 } 1193 /* free Neumann problem's matrix */ 1194 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1200 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1201 { 1202 PetscErrorCode ierr; 1203 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1204 1205 PetscFunctionBegin; 1206 if (applytranspose) { 1207 if (pcbddc->local_auxmat1) { 1208 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1209 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1210 } 1211 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1212 } else { 1213 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1214 if (pcbddc->local_auxmat1) { 1215 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1216 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1217 } 1218 } 1219 PetscFunctionReturn(0); 1220 } 1221 1222 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1223 #undef __FUNCT__ 1224 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1225 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1226 { 1227 PetscErrorCode ierr; 1228 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1229 PC_IS* pcis = (PC_IS*) (pc->data); 1230 const PetscScalar zero = 0.0; 1231 1232 PetscFunctionBegin; 1233 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1234 if (applytranspose) { 1235 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1236 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1237 } else { 1238 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1239 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1240 } 1241 /* start communications from local primal nodes to rhs of coarse solver */ 1242 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1243 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 1246 /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */ 1247 /* TODO remove null space when doing multilevel */ 1248 if (pcbddc->coarse_ksp) { 1249 if (applytranspose) { 1250 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1251 } else { 1252 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1253 } 1254 } 1255 /* start communications from coarse solver solution to local primal nodes */ 1256 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1257 1258 /* Local solution on R nodes */ 1259 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1260 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1261 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1262 if (pcbddc->switch_static) { 1263 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1264 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1265 } 1266 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1267 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1268 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1270 if (pcbddc->switch_static) { 1271 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1272 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1273 } 1274 1275 /* complete communications from coarse sol to local primal nodes */ 1276 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1277 1278 /* Sum contributions from two levels */ 1279 if (applytranspose) { 1280 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1281 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1282 } else { 1283 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1284 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1285 } 1286 PetscFunctionReturn(0); 1287 } 1288 1289 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1292 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1293 { 1294 PetscErrorCode ierr; 1295 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1296 PetscScalar *array,*array2; 1297 Vec from,to; 1298 1299 PetscFunctionBegin; 1300 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1301 from = pcbddc->coarse_vec; 1302 to = pcbddc->vec1_P; 1303 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1304 Vec tvec; 1305 PetscInt lsize; 1306 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1307 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1308 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1309 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1310 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1311 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1312 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1313 } 1314 } else { /* from local to global -> put data in coarse right hand side */ 1315 from = pcbddc->vec1_P; 1316 to = pcbddc->coarse_vec; 1317 } 1318 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1324 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1325 { 1326 PetscErrorCode ierr; 1327 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1328 PetscScalar *array,*array2; 1329 Vec from,to; 1330 1331 PetscFunctionBegin; 1332 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1333 from = pcbddc->coarse_vec; 1334 to = pcbddc->vec1_P; 1335 } else { /* from local to global -> put data in coarse right hand side */ 1336 from = pcbddc->vec1_P; 1337 to = pcbddc->coarse_vec; 1338 } 1339 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1340 if (smode == SCATTER_FORWARD) { 1341 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1342 Vec tvec; 1343 PetscInt lsize; 1344 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1345 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1346 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1347 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1348 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1349 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1350 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1351 } 1352 } 1353 PetscFunctionReturn(0); 1354 } 1355 1356 /* uncomment for testing purposes */ 1357 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1360 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1361 { 1362 PetscErrorCode ierr; 1363 PC_IS* pcis = (PC_IS*)(pc->data); 1364 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1365 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1366 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1367 MatType impMatType=MATSEQAIJ; 1368 /* one and zero */ 1369 PetscScalar one=1.0,zero=0.0; 1370 /* space to store constraints and their local indices */ 1371 PetscScalar *temp_quadrature_constraint; 1372 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1373 /* iterators */ 1374 PetscInt i,j,k,total_counts,temp_start_ptr; 1375 /* stuff to store connected components stored in pcbddc->mat_graph */ 1376 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1377 PetscInt n_ISForFaces,n_ISForEdges; 1378 /* near null space stuff */ 1379 MatNullSpace nearnullsp; 1380 const Vec *nearnullvecs; 1381 Vec *localnearnullsp; 1382 PetscBool nnsp_has_cnst; 1383 PetscInt nnsp_size; 1384 PetscScalar *array; 1385 /* BLAS integers */ 1386 PetscBLASInt lwork,lierr; 1387 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1388 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1389 /* LAPACK working arrays for SVD or POD */ 1390 PetscBool skip_lapack; 1391 PetscScalar *work; 1392 PetscReal *singular_vals; 1393 #if defined(PETSC_USE_COMPLEX) 1394 PetscReal *rwork; 1395 #endif 1396 #if defined(PETSC_MISSING_LAPACK_GESVD) 1397 PetscBLASInt Blas_one_2=1; 1398 PetscScalar *temp_basis,*correlation_mat; 1399 #else 1400 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1401 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1402 #endif 1403 /* reuse */ 1404 PetscInt olocal_primal_size; 1405 PetscInt *oprimal_indices_local_idxs; 1406 /* change of basis */ 1407 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1408 PetscBool boolforchange,qr_needed; 1409 PetscBT touched,change_basis,qr_needed_idx; 1410 /* auxiliary stuff */ 1411 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1412 /* some quantities */ 1413 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1414 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1415 1416 1417 PetscFunctionBegin; 1418 /* Destroy Mat objects computed previously */ 1419 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1420 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1421 /* Get index sets for faces, edges and vertices from graph */ 1422 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1423 pcbddc->use_vertices = PETSC_TRUE; 1424 } 1425 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1426 /* HACK: provide functions to set change of basis */ 1427 if (!ISForVertices && pcbddc->NullSpace) { 1428 pcbddc->use_change_of_basis = PETSC_TRUE; 1429 pcbddc->use_change_on_faces = PETSC_FALSE; 1430 } 1431 /* print some info */ 1432 if (pcbddc->dbg_flag) { 1433 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1434 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1435 i = 0; 1436 if (ISForVertices) { 1437 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1438 } 1439 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1440 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1441 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1442 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1443 } 1444 /* check if near null space is attached to global mat */ 1445 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1446 if (nearnullsp) { 1447 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1448 /* remove any stored info */ 1449 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1450 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1451 /* store information for BDDC solver reuse */ 1452 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1453 pcbddc->onearnullspace = nearnullsp; 1454 ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1455 for (i=0;i<nnsp_size;i++) { 1456 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1457 } 1458 } else { /* if near null space is not provided BDDC uses constants by default */ 1459 nnsp_size = 0; 1460 nnsp_has_cnst = PETSC_TRUE; 1461 } 1462 /* get max number of constraints on a single cc */ 1463 max_constraints = nnsp_size; 1464 if (nnsp_has_cnst) max_constraints++; 1465 1466 /* 1467 Evaluate maximum storage size needed by the procedure 1468 - temp_indices will contain start index of each constraint stored as follows 1469 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1470 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1471 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1472 */ 1473 total_counts = n_ISForFaces+n_ISForEdges; 1474 total_counts *= max_constraints; 1475 n_vertices = 0; 1476 if (ISForVertices) { 1477 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1478 } 1479 total_counts += n_vertices; 1480 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1481 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1482 total_counts = 0; 1483 max_size_of_constraint = 0; 1484 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1485 if (i<n_ISForEdges) { 1486 used_IS = &ISForEdges[i]; 1487 } else { 1488 used_IS = &ISForFaces[i-n_ISForEdges]; 1489 } 1490 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1491 total_counts += j; 1492 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1493 } 1494 total_counts *= max_constraints; 1495 total_counts += n_vertices; 1496 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1497 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1498 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1499 /* get local part of global near null space vectors */ 1500 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1501 for (k=0;k<nnsp_size;k++) { 1502 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1503 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1504 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1505 } 1506 1507 /* whether or not to skip lapack calls */ 1508 skip_lapack = PETSC_TRUE; 1509 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1510 1511 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1512 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1513 PetscScalar temp_work; 1514 #if defined(PETSC_MISSING_LAPACK_GESVD) 1515 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1516 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1517 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1518 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1519 #if defined(PETSC_USE_COMPLEX) 1520 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1521 #endif 1522 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1523 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1524 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1525 lwork = -1; 1526 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1527 #if !defined(PETSC_USE_COMPLEX) 1528 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1529 #else 1530 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1531 #endif 1532 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1533 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1534 #else /* on missing GESVD */ 1535 /* SVD */ 1536 PetscInt max_n,min_n; 1537 max_n = max_size_of_constraint; 1538 min_n = max_constraints; 1539 if (max_size_of_constraint < max_constraints) { 1540 min_n = max_size_of_constraint; 1541 max_n = max_constraints; 1542 } 1543 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1544 #if defined(PETSC_USE_COMPLEX) 1545 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1546 #endif 1547 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1548 lwork = -1; 1549 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1550 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1551 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1552 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1553 #if !defined(PETSC_USE_COMPLEX) 1554 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1555 #else 1556 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1557 #endif 1558 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1559 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1560 #endif /* on missing GESVD */ 1561 /* Allocate optimal workspace */ 1562 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1563 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1564 } 1565 /* Now we can loop on constraining sets */ 1566 total_counts = 0; 1567 temp_indices[0] = 0; 1568 /* vertices */ 1569 if (ISForVertices) { 1570 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1571 if (nnsp_has_cnst) { /* consider all vertices */ 1572 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1573 for (i=0;i<n_vertices;i++) { 1574 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1575 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1576 total_counts++; 1577 } 1578 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1579 PetscBool used_vertex; 1580 for (i=0;i<n_vertices;i++) { 1581 used_vertex = PETSC_FALSE; 1582 k = 0; 1583 while (!used_vertex && k<nnsp_size) { 1584 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1585 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1586 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1587 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1588 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1589 total_counts++; 1590 used_vertex = PETSC_TRUE; 1591 } 1592 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1593 k++; 1594 } 1595 } 1596 } 1597 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1598 n_vertices = total_counts; 1599 } 1600 1601 /* edges and faces */ 1602 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1603 if (i<n_ISForEdges) { 1604 used_IS = &ISForEdges[i]; 1605 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1606 } else { 1607 used_IS = &ISForFaces[i-n_ISForEdges]; 1608 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1609 } 1610 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1611 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1612 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1613 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1614 /* change of basis should not be performed on local periodic nodes */ 1615 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1616 if (nnsp_has_cnst) { 1617 PetscScalar quad_value; 1618 temp_constraints++; 1619 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1620 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1621 for (j=0;j<size_of_constraint;j++) { 1622 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1623 } 1624 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1625 total_counts++; 1626 } 1627 for (k=0;k<nnsp_size;k++) { 1628 PetscReal real_value; 1629 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1630 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1631 for (j=0;j<size_of_constraint;j++) { 1632 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1633 } 1634 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1635 /* check if array is null on the connected component */ 1636 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1637 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1638 if (real_value > 0.0) { /* keep indices and values */ 1639 temp_constraints++; 1640 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1641 total_counts++; 1642 } 1643 } 1644 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1645 valid_constraints = temp_constraints; 1646 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1647 if (!pcbddc->use_nnsp_true && temp_constraints) { 1648 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1649 1650 #if defined(PETSC_MISSING_LAPACK_GESVD) 1651 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1652 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1653 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1654 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1655 from that computed using LAPACKgesvd 1656 -> This is due to a different computation of eigenvectors in LAPACKheev 1657 -> The quality of the POD-computed basis will be the same */ 1658 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1659 /* Store upper triangular part of correlation matrix */ 1660 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1661 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1662 for (j=0;j<temp_constraints;j++) { 1663 for (k=0;k<j+1;k++) { 1664 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1665 } 1666 } 1667 /* compute eigenvalues and eigenvectors of correlation matrix */ 1668 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1669 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1670 #if !defined(PETSC_USE_COMPLEX) 1671 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1672 #else 1673 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1674 #endif 1675 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1676 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1677 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1678 j=0; 1679 while (j < temp_constraints && singular_vals[j] < tol) j++; 1680 total_counts=total_counts-j; 1681 valid_constraints = temp_constraints-j; 1682 /* scale and copy POD basis into used quadrature memory */ 1683 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1684 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1685 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1686 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1687 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1688 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1689 if (j<temp_constraints) { 1690 PetscInt ii; 1691 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1692 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1693 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1694 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1695 for (k=0;k<temp_constraints-j;k++) { 1696 for (ii=0;ii<size_of_constraint;ii++) { 1697 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1698 } 1699 } 1700 } 1701 #else /* on missing GESVD */ 1702 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1703 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1704 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1705 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1706 #if !defined(PETSC_USE_COMPLEX) 1707 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1708 #else 1709 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1710 #endif 1711 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1712 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1713 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1714 k = temp_constraints; 1715 if (k > size_of_constraint) k = size_of_constraint; 1716 j = 0; 1717 while (j < k && singular_vals[k-j-1] < tol) j++; 1718 total_counts = total_counts-temp_constraints+k-j; 1719 valid_constraints = k-j; 1720 #endif /* on missing GESVD */ 1721 } 1722 /* setting change_of_basis flag is safe now */ 1723 if (boolforchange) { 1724 for (j=0;j<valid_constraints;j++) { 1725 PetscBTSet(change_basis,total_counts-j-1); 1726 } 1727 } 1728 } 1729 /* free index sets of faces, edges and vertices */ 1730 for (i=0;i<n_ISForFaces;i++) { 1731 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1732 } 1733 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1734 for (i=0;i<n_ISForEdges;i++) { 1735 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1736 } 1737 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1738 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1739 /* map temp_indices_to_constraint in boundary numbering */ 1740 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1741 if (i != temp_indices[total_counts]) { 1742 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1743 } 1744 1745 /* free workspace */ 1746 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1747 ierr = PetscFree(work);CHKERRQ(ierr); 1748 #if defined(PETSC_USE_COMPLEX) 1749 ierr = PetscFree(rwork);CHKERRQ(ierr); 1750 #endif 1751 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1752 #if defined(PETSC_MISSING_LAPACK_GESVD) 1753 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1754 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1755 #endif 1756 } 1757 for (k=0;k<nnsp_size;k++) { 1758 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1759 } 1760 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1761 1762 /* set quantities in pcbddc data structure and store previous primal size */ 1763 /* n_vertices defines the number of subdomain corners in the primal space */ 1764 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1765 olocal_primal_size = pcbddc->local_primal_size; 1766 pcbddc->local_primal_size = total_counts; 1767 pcbddc->n_vertices = n_vertices; 1768 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1769 1770 /* Create constraint matrix */ 1771 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1772 /* so we need to set it up properly either with or without change of basis */ 1773 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1774 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1775 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1776 /* array to compute a local numbering of constraints : vertices first then constraints */ 1777 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1778 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1779 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1780 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1781 /* auxiliary stuff for basis change */ 1782 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1783 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1784 1785 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1786 total_primal_vertices=0; 1787 for (i=0;i<pcbddc->local_primal_size;i++) { 1788 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1789 if (size_of_constraint == 1) { 1790 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1791 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1792 aux_primal_minloc[total_primal_vertices]=0; 1793 total_primal_vertices++; 1794 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1795 PetscInt min_loc,min_index; 1796 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1797 /* find first untouched local node */ 1798 k = 0; 1799 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1800 min_index = global_indices[k]; 1801 min_loc = k; 1802 /* search the minimum among global nodes already untouched on the cc */ 1803 for (k=1;k<size_of_constraint;k++) { 1804 /* there can be more than one constraint on a single connected component */ 1805 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1806 min_index = global_indices[k]; 1807 min_loc = k; 1808 } 1809 } 1810 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1811 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1812 aux_primal_minloc[total_primal_vertices]=min_loc; 1813 total_primal_vertices++; 1814 } 1815 } 1816 /* determine if a QR strategy is needed for change of basis */ 1817 qr_needed = PETSC_FALSE; 1818 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1819 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1820 if (PetscBTLookup(change_basis,i)) { 1821 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1822 j = 0; 1823 for (k=0;k<size_of_constraint;k++) { 1824 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1825 j++; 1826 } 1827 } 1828 /* found more than one primal dof on the cc */ 1829 if (j > 1) { 1830 PetscBTSet(qr_needed_idx,i); 1831 qr_needed = PETSC_TRUE; 1832 } 1833 } 1834 } 1835 /* free workspace */ 1836 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1837 1838 /* permute indices in order to have a sorted set of vertices */ 1839 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1840 1841 /* nonzero structure of constraint matrix */ 1842 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1843 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1844 j=total_primal_vertices; 1845 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1846 if (!PetscBTLookup(change_basis,i)) { 1847 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1848 j++; 1849 } 1850 } 1851 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1852 ierr = PetscFree(nnz);CHKERRQ(ierr); 1853 /* set values in constraint matrix */ 1854 for (i=0;i<total_primal_vertices;i++) { 1855 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1856 } 1857 total_counts = total_primal_vertices; 1858 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1859 if (!PetscBTLookup(change_basis,i)) { 1860 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1861 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1862 total_counts++; 1863 } 1864 } 1865 /* assembling */ 1866 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1867 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1868 /* 1869 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1870 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1871 */ 1872 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1873 if (pcbddc->use_change_of_basis) { 1874 /* dual and primal dofs on a single cc */ 1875 PetscInt dual_dofs,primal_dofs; 1876 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1877 PetscInt primal_counter; 1878 /* working stuff for GEQRF */ 1879 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1880 PetscBLASInt lqr_work; 1881 /* working stuff for UNGQR */ 1882 PetscScalar *gqr_work,lgqr_work_t; 1883 PetscBLASInt lgqr_work; 1884 /* working stuff for TRTRS */ 1885 PetscScalar *trs_rhs; 1886 PetscBLASInt Blas_NRHS; 1887 /* pointers for values insertion into change of basis matrix */ 1888 PetscInt *start_rows,*start_cols; 1889 PetscScalar *start_vals; 1890 /* working stuff for values insertion */ 1891 PetscBT is_primal; 1892 1893 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1894 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1895 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1896 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1897 /* work arrays */ 1898 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1899 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1900 /* nonzeros per row */ 1901 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1902 if (PetscBTLookup(change_basis,i)) { 1903 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1904 if (PetscBTLookup(qr_needed_idx,i)) { 1905 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1906 } else { 1907 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1908 /* get local primal index on the cc */ 1909 j = 0; 1910 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1911 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1912 } 1913 } 1914 } 1915 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1916 ierr = PetscFree(nnz);CHKERRQ(ierr); 1917 /* Set initial identity in the matrix */ 1918 for (i=0;i<pcis->n_B;i++) { 1919 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1920 } 1921 1922 if (pcbddc->dbg_flag) { 1923 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1924 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1925 } 1926 1927 1928 /* Now we loop on the constraints which need a change of basis */ 1929 /* 1930 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1931 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1932 1933 Basic blocks of change of basis matrix T computed 1934 1935 - Using the following block transformation if there is only a primal dof on the cc 1936 (in the example, primal dof is the last one of the edge in LOCAL ordering 1937 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1938 | 1 0 ... 0 1 | 1939 | 0 1 ... 0 1 | 1940 | ... | 1941 | 0 ... 1 1 | 1942 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1943 1944 - via QR decomposition of constraints otherwise 1945 */ 1946 if (qr_needed) { 1947 /* space to store Q */ 1948 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1949 /* first we issue queries for optimal work */ 1950 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1951 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1952 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1953 lqr_work = -1; 1954 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1955 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1956 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1957 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1958 lgqr_work = -1; 1959 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1960 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1961 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1962 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1963 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1964 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1965 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1966 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1967 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1968 /* array to store scaling factors for reflectors */ 1969 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1970 /* array to store rhs and solution of triangular solver */ 1971 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1972 /* allocating workspace for check */ 1973 if (pcbddc->dbg_flag) { 1974 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1975 } 1976 } 1977 /* array to store whether a node is primal or not */ 1978 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1979 ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr); 1980 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 1981 if (i != total_primal_vertices) { 1982 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 1983 } 1984 for (i=0;i<total_primal_vertices;i++) { 1985 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 1986 } 1987 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 1988 1989 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1990 /* -> using implicit ordering contained in temp_indices data */ 1991 total_counts = pcbddc->n_vertices; 1992 primal_counter = total_counts; 1993 while (total_counts<pcbddc->local_primal_size) { 1994 primal_dofs = 1; 1995 if (PetscBTLookup(change_basis,total_counts)) { 1996 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1997 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1998 primal_dofs++; 1999 } 2000 /* get constraint info */ 2001 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2002 dual_dofs = size_of_constraint-primal_dofs; 2003 2004 if (pcbddc->dbg_flag) { 2005 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2006 } 2007 2008 if (primal_dofs > 1) { /* QR */ 2009 2010 /* copy quadrature constraints for change of basis check */ 2011 if (pcbddc->dbg_flag) { 2012 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2013 } 2014 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2015 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2016 2017 /* compute QR decomposition of constraints */ 2018 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2019 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2020 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2021 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2022 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2023 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2024 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2025 2026 /* explictly compute R^-T */ 2027 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2028 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2029 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2030 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2031 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2032 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2033 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2034 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2035 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2036 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2037 2038 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2039 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2040 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2041 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2042 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2043 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2044 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2045 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2046 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2047 2048 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2049 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2050 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2051 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2052 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2053 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2054 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2055 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2056 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2057 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2058 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2059 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2060 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2061 2062 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2063 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2064 /* insert cols for primal dofs */ 2065 for (j=0;j<primal_dofs;j++) { 2066 start_vals = &qr_basis[j*size_of_constraint]; 2067 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2068 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2069 } 2070 /* insert cols for dual dofs */ 2071 for (j=0,k=0;j<dual_dofs;k++) { 2072 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2073 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2074 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2075 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2076 j++; 2077 } 2078 } 2079 2080 /* check change of basis */ 2081 if (pcbddc->dbg_flag) { 2082 PetscInt ii,jj; 2083 PetscBool valid_qr=PETSC_TRUE; 2084 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2085 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2086 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2087 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2088 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2089 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2090 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2091 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2092 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2093 for (jj=0;jj<size_of_constraint;jj++) { 2094 for (ii=0;ii<primal_dofs;ii++) { 2095 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2096 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2097 } 2098 } 2099 if (!valid_qr) { 2100 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2101 for (jj=0;jj<size_of_constraint;jj++) { 2102 for (ii=0;ii<primal_dofs;ii++) { 2103 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2104 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2105 } 2106 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2107 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2108 } 2109 } 2110 } 2111 } else { 2112 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2113 } 2114 } 2115 } else { /* simple transformation block */ 2116 PetscInt row,col; 2117 PetscScalar val; 2118 for (j=0;j<size_of_constraint;j++) { 2119 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2120 if (!PetscBTLookup(is_primal,row)) { 2121 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2122 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2123 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2124 } else { 2125 for (k=0;k<size_of_constraint;k++) { 2126 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2127 if (row != col) { 2128 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2129 } else { 2130 val = 1.0; 2131 } 2132 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2133 } 2134 } 2135 } 2136 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2137 } 2138 /* increment primal counter */ 2139 primal_counter += primal_dofs; 2140 } else { 2141 if (pcbddc->dbg_flag) { 2142 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2143 } 2144 } 2145 /* increment constraint counter total_counts */ 2146 total_counts += primal_dofs; 2147 } 2148 2149 /* free workspace */ 2150 if (qr_needed) { 2151 if (pcbddc->dbg_flag) { 2152 ierr = PetscFree(work);CHKERRQ(ierr); 2153 } 2154 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2155 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2156 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2157 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2158 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2159 } 2160 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2161 /* assembling */ 2162 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2163 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2164 /* 2165 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2166 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2167 */ 2168 } 2169 2170 /* get indices in local ordering for vertices and constraints */ 2171 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2172 ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr); 2173 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2174 } 2175 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2176 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2177 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2178 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2179 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2180 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2181 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2182 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2183 /* set quantities in PCBDDC data struct */ 2184 pcbddc->n_actual_vertices = i; 2185 /* check if a new primal space has been introduced */ 2186 pcbddc->new_primal_space_local = PETSC_TRUE; 2187 if (olocal_primal_size == pcbddc->local_primal_size) { 2188 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2189 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2190 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2191 } 2192 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2193 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2194 2195 /* flush dbg viewer */ 2196 if (pcbddc->dbg_flag) { 2197 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2198 } 2199 2200 /* free workspace */ 2201 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2202 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2203 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2204 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2205 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2206 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2207 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2208 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2214 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2215 { 2216 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2217 PC_IS *pcis = (PC_IS*)pc->data; 2218 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2219 PetscInt ierr,i,vertex_size; 2220 PetscViewer viewer=pcbddc->dbg_viewer; 2221 2222 PetscFunctionBegin; 2223 /* Reset previously computed graph */ 2224 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2225 /* Init local Graph struct */ 2226 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2227 2228 /* Check validity of the csr graph passed in by the user */ 2229 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2230 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2231 } 2232 2233 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2234 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2235 Mat mat_adj; 2236 const PetscInt *xadj,*adjncy; 2237 PetscBool flg_row=PETSC_TRUE; 2238 2239 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2240 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2241 if (!flg_row) { 2242 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2243 } 2244 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2245 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2246 if (!flg_row) { 2247 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2248 } 2249 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2250 } 2251 2252 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2253 vertex_size = 1; 2254 if (pcbddc->user_provided_isfordofs) { 2255 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2256 ierr = PetscMalloc(pcbddc->n_ISForDofs*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2257 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2258 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2259 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2260 } 2261 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2262 pcbddc->n_ISForDofs = 0; 2263 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2264 } 2265 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2266 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2267 } else { 2268 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2269 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2270 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2271 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2272 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2273 } 2274 } 2275 } 2276 2277 /* Setup of Graph */ 2278 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2279 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2280 } 2281 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2282 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2283 } 2284 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2285 2286 /* Graph's connected components analysis */ 2287 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2288 2289 /* print some info to stdout */ 2290 if (pcbddc->dbg_flag) { 2291 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2292 } 2293 2294 /* mark topography has done */ 2295 pcbddc->recompute_topography = PETSC_FALSE; 2296 PetscFunctionReturn(0); 2297 } 2298 2299 #undef __FUNCT__ 2300 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2301 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2302 { 2303 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2304 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2305 PetscErrorCode ierr; 2306 2307 PetscFunctionBegin; 2308 n = 0; 2309 vertices = 0; 2310 if (pcbddc->ConstraintMatrix) { 2311 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2312 for (i=0;i<local_primal_size;i++) { 2313 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2314 if (size_of_constraint == 1) n++; 2315 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2316 } 2317 if (vertices_idx) { 2318 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2319 n = 0; 2320 for (i=0;i<local_primal_size;i++) { 2321 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2322 if (size_of_constraint == 1) { 2323 vertices[n++]=row_cmat_indices[0]; 2324 } 2325 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2326 } 2327 } 2328 } 2329 *n_vertices = n; 2330 if (vertices_idx) *vertices_idx = vertices; 2331 PetscFunctionReturn(0); 2332 } 2333 2334 #undef __FUNCT__ 2335 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2336 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2337 { 2338 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2339 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2340 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2341 PetscBT touched; 2342 PetscErrorCode ierr; 2343 2344 /* This function assumes that the number of local constraints per connected component 2345 is not greater than the number of nodes defined for the connected component 2346 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2347 PetscFunctionBegin; 2348 n = 0; 2349 constraints_index = 0; 2350 if (pcbddc->ConstraintMatrix) { 2351 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2352 max_size_of_constraint = 0; 2353 for (i=0;i<local_primal_size;i++) { 2354 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2355 if (size_of_constraint > 1) { 2356 n++; 2357 } 2358 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2359 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2360 } 2361 if (constraints_idx) { 2362 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2363 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2364 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2365 n = 0; 2366 for (i=0;i<local_primal_size;i++) { 2367 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2368 if (size_of_constraint > 1) { 2369 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2370 /* find first untouched local node */ 2371 j = 0; 2372 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2373 min_index = row_cmat_global_indices[j]; 2374 min_loc = j; 2375 /* search the minimum among nodes not yet touched on the connected component 2376 since there can be more than one constraint on a single cc */ 2377 for (j=1;j<size_of_constraint;j++) { 2378 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2379 min_index = row_cmat_global_indices[j]; 2380 min_loc = j; 2381 } 2382 } 2383 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2384 constraints_index[n++] = row_cmat_indices[min_loc]; 2385 } 2386 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2387 } 2388 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2389 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2390 } 2391 } 2392 *n_constraints = n; 2393 if (constraints_idx) *constraints_idx = constraints_index; 2394 PetscFunctionReturn(0); 2395 } 2396 2397 /* the next two functions has been adapted from pcis.c */ 2398 #undef __FUNCT__ 2399 #define __FUNCT__ "PCBDDCApplySchur" 2400 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2401 { 2402 PetscErrorCode ierr; 2403 PC_IS *pcis = (PC_IS*)(pc->data); 2404 2405 PetscFunctionBegin; 2406 if (!vec2_B) { vec2_B = v; } 2407 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2408 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2409 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2410 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2411 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2412 PetscFunctionReturn(0); 2413 } 2414 2415 #undef __FUNCT__ 2416 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2417 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2418 { 2419 PetscErrorCode ierr; 2420 PC_IS *pcis = (PC_IS*)(pc->data); 2421 2422 PetscFunctionBegin; 2423 if (!vec2_B) { vec2_B = v; } 2424 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2425 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2426 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2427 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2428 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2429 PetscFunctionReturn(0); 2430 } 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "PCBDDCSubsetNumbering" 2434 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2435 { 2436 Vec local_vec,global_vec; 2437 IS seqis,paris; 2438 VecScatter scatter_ctx; 2439 PetscScalar *array; 2440 PetscInt *temp_global_dofs; 2441 PetscScalar globalsum; 2442 PetscInt i,j,s; 2443 PetscInt nlocals,first_index,old_index,max_local; 2444 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2445 PetscMPIInt *dof_sizes,*dof_displs; 2446 PetscBool first_found; 2447 PetscErrorCode ierr; 2448 2449 PetscFunctionBegin; 2450 /* mpi buffers */ 2451 MPI_Comm_size(comm,&size_prec_comm); 2452 MPI_Comm_rank(comm,&rank_prec_comm); 2453 j = ( !rank_prec_comm ? size_prec_comm : 0); 2454 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2455 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2456 /* get maximum size of subset */ 2457 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2458 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2459 max_local = 0; 2460 if (n_local_dofs) { 2461 max_local = temp_global_dofs[0]; 2462 for (i=1;i<n_local_dofs;i++) { 2463 if (max_local < temp_global_dofs[i] ) { 2464 max_local = temp_global_dofs[i]; 2465 } 2466 } 2467 } 2468 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2469 max_global++; 2470 max_local = 0; 2471 if (n_local_dofs) { 2472 max_local = local_dofs[0]; 2473 for (i=1;i<n_local_dofs;i++) { 2474 if (max_local < local_dofs[i] ) { 2475 max_local = local_dofs[i]; 2476 } 2477 } 2478 } 2479 max_local++; 2480 /* allocate workspace */ 2481 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2482 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2483 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2484 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2485 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2486 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2487 /* create scatter */ 2488 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2489 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2490 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2491 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2492 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2493 /* init array */ 2494 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2495 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2496 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2497 if (local_dofs_mult) { 2498 for (i=0;i<n_local_dofs;i++) { 2499 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2500 } 2501 } else { 2502 for (i=0;i<n_local_dofs;i++) { 2503 array[local_dofs[i]]=1.0; 2504 } 2505 } 2506 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2507 /* scatter into global vec and get total number of global dofs */ 2508 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2509 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2510 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2511 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2512 /* Fill global_vec with cumulative function for global numbering */ 2513 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2514 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2515 nlocals = 0; 2516 first_index = -1; 2517 first_found = PETSC_FALSE; 2518 for (i=0;i<s;i++) { 2519 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2520 first_found = PETSC_TRUE; 2521 first_index = i; 2522 } 2523 nlocals += (PetscInt)PetscRealPart(array[i]); 2524 } 2525 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2526 if (!rank_prec_comm) { 2527 dof_displs[0]=0; 2528 for (i=1;i<size_prec_comm;i++) { 2529 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2530 } 2531 } 2532 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2533 if (first_found) { 2534 array[first_index] += (PetscScalar)nlocals; 2535 old_index = first_index; 2536 for (i=first_index+1;i<s;i++) { 2537 if (PetscRealPart(array[i]) > 0.0) { 2538 array[i] += array[old_index]; 2539 old_index = i; 2540 } 2541 } 2542 } 2543 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2544 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2545 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2546 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2547 /* get global ordering of local dofs */ 2548 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2549 if (local_dofs_mult) { 2550 for (i=0;i<n_local_dofs;i++) { 2551 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2552 } 2553 } else { 2554 for (i=0;i<n_local_dofs;i++) { 2555 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2556 } 2557 } 2558 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2559 /* free workspace */ 2560 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2561 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2562 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2563 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2564 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2565 /* return pointer to global ordering of local dofs */ 2566 *global_numbering_subset = temp_global_dofs; 2567 PetscFunctionReturn(0); 2568 } 2569 2570 #undef __FUNCT__ 2571 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2572 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2573 { 2574 PetscInt i,j; 2575 PetscScalar *alphas; 2576 PetscErrorCode ierr; 2577 2578 PetscFunctionBegin; 2579 /* this implements stabilized Gram-Schmidt */ 2580 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2581 for (i=0;i<n;i++) { 2582 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2583 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2584 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2585 } 2586 ierr = PetscFree(alphas);CHKERRQ(ierr); 2587 PetscFunctionReturn(0); 2588 } 2589 2590 /* TODO 2591 - now preallocation is done assuming SEQDENSE local matrices 2592 */ 2593 #undef __FUNCT__ 2594 #define __FUNCT__ "MatISGetMPIXAIJ" 2595 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M) 2596 { 2597 Mat new_mat; 2598 Mat_IS *matis = (Mat_IS*)(mat->data); 2599 /* info on mat */ 2600 /* ISLocalToGlobalMapping rmapping,cmapping; */ 2601 PetscInt bs,rows,cols; 2602 PetscInt lrows,lcols; 2603 PetscInt local_rows,local_cols; 2604 PetscBool isdense; 2605 /* values insertion */ 2606 PetscScalar *array; 2607 PetscInt *local_indices,*global_indices; 2608 /* work */ 2609 PetscInt i,j,index_row; 2610 PetscErrorCode ierr; 2611 2612 PetscFunctionBegin; 2613 /* MISSING CHECKS 2614 - rectangular case not covered (it is not allowed by MATIS) 2615 */ 2616 /* get info from mat */ 2617 /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */ 2618 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2619 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2620 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2621 2622 /* work */ 2623 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2624 for (i=0;i<local_rows;i++) local_indices[i]=i; 2625 /* map indices of local mat to global values */ 2626 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2627 /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */ 2628 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2629 2630 if (reuse==MAT_INITIAL_MATRIX) { 2631 Vec vec_dnz,vec_onz; 2632 PetscScalar *my_dnz,*my_onz; 2633 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2634 PetscInt index_col,owner; 2635 PetscMPIInt nsubdomains; 2636 2637 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2638 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2639 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2640 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2641 ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr); 2642 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2643 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2644 2645 /* 2646 preallocation 2647 */ 2648 2649 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2650 /* 2651 Some vectors are needed to sum up properly on shared interface dofs. 2652 Preallocation macros cannot do the job. 2653 Note that preallocation is not exact, since it overestimates nonzeros 2654 */ 2655 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2656 /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */ 2657 ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr); 2658 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2659 /* All processes need to compute entire row ownership */ 2660 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2661 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2662 for (i=0;i<nsubdomains;i++) { 2663 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2664 row_ownership[j]=i; 2665 } 2666 } 2667 2668 /* 2669 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2670 then, they will be summed up properly. This way, preallocation is always sufficient 2671 */ 2672 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2673 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2674 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2675 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2676 for (i=0;i<local_rows;i++) { 2677 index_row = global_indices[i]; 2678 for (j=i;j<local_rows;j++) { 2679 owner = row_ownership[index_row]; 2680 index_col = global_indices[j]; 2681 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2682 my_dnz[i] += 1.0; 2683 } else { /* offdiag block */ 2684 my_onz[i] += 1.0; 2685 } 2686 /* same as before, interchanging rows and cols */ 2687 if (i != j) { 2688 owner = row_ownership[index_col]; 2689 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2690 my_dnz[j] += 1.0; 2691 } else { 2692 my_onz[j] += 1.0; 2693 } 2694 } 2695 } 2696 } 2697 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2698 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2699 if (local_rows) { /* multilevel guard */ 2700 ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2701 ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2702 } 2703 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2704 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2705 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2706 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2707 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2708 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2709 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2710 2711 /* set computed preallocation in dnz and onz */ 2712 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2713 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2714 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2715 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2716 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2717 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2718 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2719 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2720 2721 /* Resize preallocation if overestimated */ 2722 for (i=0;i<lrows;i++) { 2723 dnz[i] = PetscMin(dnz[i],lcols); 2724 onz[i] = PetscMin(onz[i],cols-lcols); 2725 } 2726 /* set preallocation */ 2727 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2728 for (i=0;i<lrows/bs;i++) { 2729 dnz[i] = dnz[i*bs]/bs; 2730 onz[i] = onz[i*bs]/bs; 2731 } 2732 ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2733 for (i=0;i<lrows/bs;i++) { 2734 dnz[i] = dnz[i]-i; 2735 } 2736 ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2737 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2738 *M = new_mat; 2739 } else { 2740 PetscInt mbs,mrows,mcols; 2741 /* some checks */ 2742 ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); 2743 ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); 2744 if (mrows != rows) { 2745 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); 2746 } 2747 if (mrows != rows) { 2748 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); 2749 } 2750 if (mbs != bs) { 2751 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); 2752 } 2753 ierr = MatZeroEntries(*M);CHKERRQ(ierr); 2754 } 2755 /* set local to global mappings */ 2756 /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */ 2757 /* Set values */ 2758 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2759 if (isdense) { /* special case for dense local matrices */ 2760 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2761 ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr); 2762 ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2763 ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr); 2764 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2765 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2766 } else { /* very basic values insertion for all other matrix types */ 2767 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2768 for (i=0;i<local_rows;i++) { 2769 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2770 /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */ 2771 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2772 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2773 ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2774 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2775 } 2776 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2777 } 2778 ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2779 ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2780 if (isdense) { 2781 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2782 } 2783 PetscFunctionReturn(0); 2784 } 2785 2786 #undef __FUNCT__ 2787 #define __FUNCT__ "MatISGetSubassemblingPattern" 2788 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2789 { 2790 Mat subdomain_adj; 2791 IS new_ranks,ranks_send_to; 2792 MatPartitioning partitioner; 2793 Mat_IS *matis; 2794 PetscInt n_neighs,*neighs,*n_shared,**shared; 2795 PetscInt prank; 2796 PetscMPIInt size,rank,color; 2797 PetscInt *xadj,*adjncy,*oldranks; 2798 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2799 PetscInt i,j,local_size,threshold=0; 2800 PetscErrorCode ierr; 2801 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2802 PetscSubcomm subcomm; 2803 2804 PetscFunctionBegin; 2805 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2806 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2807 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2808 2809 /* Get info on mapping */ 2810 matis = (Mat_IS*)(mat->data); 2811 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2812 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2813 2814 /* build local CSR graph of subdomains' connectivity */ 2815 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2816 xadj[0] = 0; 2817 xadj[1] = PetscMax(n_neighs-1,0); 2818 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2819 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2820 2821 if (threshold) { 2822 PetscInt* count,min_threshold; 2823 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2824 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2825 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2826 for (j=0;j<n_shared[i];j++) { 2827 count[shared[i][j]] += 1; 2828 } 2829 } 2830 /* adapt threshold since we dont want to lose significant connections */ 2831 min_threshold = n_neighs; 2832 for (i=1;i<n_neighs;i++) { 2833 for (j=0;j<n_shared[i];j++) { 2834 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2835 } 2836 } 2837 threshold = PetscMax(min_threshold+1,threshold); 2838 xadj[1] = 0; 2839 for (i=1;i<n_neighs;i++) { 2840 for (j=0;j<n_shared[i];j++) { 2841 if (count[shared[i][j]] < threshold) { 2842 adjncy[xadj[1]] = neighs[i]; 2843 adjncy_wgt[xadj[1]] = n_shared[i]; 2844 xadj[1]++; 2845 break; 2846 } 2847 } 2848 } 2849 ierr = PetscFree(count);CHKERRQ(ierr); 2850 } else { 2851 if (xadj[1]) { 2852 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2853 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2854 } 2855 } 2856 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2857 if (use_square) { 2858 for (i=0;i<xadj[1];i++) { 2859 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2860 } 2861 } 2862 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2863 2864 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2865 2866 /* 2867 Restrict work on active processes only. 2868 */ 2869 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2870 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2871 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2872 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2873 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2874 if (color) { 2875 ierr = PetscFree(xadj);CHKERRQ(ierr); 2876 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2877 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2878 } else { 2879 PetscInt coarsening_ratio; 2880 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2881 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2882 prank = rank; 2883 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2884 /* 2885 for (i=0;i<size;i++) { 2886 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2887 } 2888 */ 2889 for (i=0;i<xadj[1];i++) { 2890 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2891 } 2892 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2893 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2894 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2895 2896 /* Partition */ 2897 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2898 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2899 if (use_vwgt) { 2900 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2901 v_wgt[0] = local_size; 2902 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2903 } 2904 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2905 coarsening_ratio = size/n_subdomains; 2906 /* Parmetis does not always give back nparts with small graphs! this should be taken into account */ 2907 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2908 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2909 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2910 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2911 2912 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2913 if (contiguous) { 2914 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2915 } else { 2916 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2917 } 2918 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2919 /* clean up */ 2920 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2921 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2922 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2923 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2924 } 2925 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2926 2927 /* assemble parallel IS for sends */ 2928 i = 1; 2929 if (color) i=0; 2930 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2931 2932 /* get back IS */ 2933 *is_sends = ranks_send_to; 2934 PetscFunctionReturn(0); 2935 } 2936 2937 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2938 2939 #undef __FUNCT__ 2940 #define __FUNCT__ "MatISSubassemble" 2941 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 2942 { 2943 Mat local_mat; 2944 Mat_IS *matis; 2945 IS is_sends_internal; 2946 PetscInt rows,cols; 2947 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 2948 PetscBool ismatis,isdense,destroy_mat; 2949 ISLocalToGlobalMapping l2gmap; 2950 PetscInt* l2gmap_indices; 2951 const PetscInt* is_indices; 2952 MatType new_local_type; 2953 /* buffers */ 2954 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2955 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 2956 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2957 /* MPI */ 2958 MPI_Comm comm,comm_n; 2959 PetscSubcomm subcomm; 2960 PetscMPIInt n_sends,n_recvs,commsize; 2961 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 2962 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 2963 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 2964 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 2965 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 2966 PetscErrorCode ierr; 2967 2968 PetscFunctionBegin; 2969 /* TODO: add missing checks */ 2970 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 2971 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 2972 PetscValidLogicalCollectiveEnum(mat,reuse,5); 2973 PetscValidLogicalCollectiveInt(mat,nis,7); 2974 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2975 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 2976 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2977 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2978 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2979 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2980 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2981 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 2982 PetscInt mrows,mcols,mnrows,mncols; 2983 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2984 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2985 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2986 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2987 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2988 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2989 } 2990 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2991 PetscValidLogicalCollectiveInt(mat,bs,0); 2992 /* prepare IS for sending if not provided */ 2993 if (!is_sends) { 2994 PetscBool pcontig = PETSC_TRUE; 2995 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 2996 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 2997 } else { 2998 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2999 is_sends_internal = is_sends; 3000 } 3001 3002 /* get pointer of MATIS data */ 3003 matis = (Mat_IS*)mat->data; 3004 3005 /* get comm */ 3006 comm = PetscObjectComm((PetscObject)mat); 3007 3008 /* compute number of sends */ 3009 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3010 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3011 3012 /* compute number of receives */ 3013 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3014 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 3015 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3016 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3017 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3018 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3019 ierr = PetscFree(iflags);CHKERRQ(ierr); 3020 3021 /* restrict comm if requested */ 3022 subcomm = 0; 3023 destroy_mat = PETSC_FALSE; 3024 if (restrict_comm) { 3025 PetscMPIInt color,rank,subcommsize; 3026 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3027 color = 0; 3028 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3029 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3030 subcommsize = commsize - subcommsize; 3031 /* check if reuse has been requested */ 3032 if (reuse == MAT_REUSE_MATRIX) { 3033 if (*mat_n) { 3034 PetscMPIInt subcommsize2; 3035 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3036 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3037 comm_n = PetscObjectComm((PetscObject)*mat_n); 3038 } else { 3039 comm_n = PETSC_COMM_SELF; 3040 } 3041 } else { /* MAT_INITIAL_MATRIX */ 3042 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3043 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3044 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3045 comm_n = subcomm->comm; 3046 } 3047 /* flag to destroy *mat_n if not significative */ 3048 if (color) destroy_mat = PETSC_TRUE; 3049 } else { 3050 comm_n = comm; 3051 } 3052 3053 /* prepare send/receive buffers */ 3054 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 3055 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3056 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 3057 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3058 if (nis) { 3059 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr); 3060 ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr); 3061 } 3062 3063 /* Get data from local matrices */ 3064 if (!isdense) { 3065 /* TODO: See below some guidelines on how to prepare the local buffers */ 3066 /* 3067 send_buffer_vals should contain the raw values of the local matrix 3068 send_buffer_idxs should contain: 3069 - MatType_PRIVATE type 3070 - PetscInt size_of_l2gmap 3071 - PetscInt global_row_indices[size_of_l2gmap] 3072 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3073 */ 3074 } else { 3075 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3076 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3077 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 3078 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3079 send_buffer_idxs[1] = i; 3080 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3081 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3082 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3083 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3084 for (i=0;i<n_sends;i++) { 3085 ilengths_vals[is_indices[i]] = len*len; 3086 ilengths_idxs[is_indices[i]] = len+2; 3087 } 3088 } 3089 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3090 /* additional is (if any) */ 3091 if (nis) { 3092 PetscMPIInt psum; 3093 PetscInt j; 3094 for (j=0,psum=0;j<nis;j++) { 3095 PetscInt plen; 3096 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3097 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3098 psum += len+1; /* indices + lenght */ 3099 } 3100 ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr); 3101 for (j=0,psum=0;j<nis;j++) { 3102 PetscInt plen; 3103 const PetscInt *is_array_idxs; 3104 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3105 send_buffer_idxs_is[psum] = plen; 3106 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3107 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3108 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3109 psum += plen+1; /* indices + lenght */ 3110 } 3111 for (i=0;i<n_sends;i++) { 3112 ilengths_idxs_is[is_indices[i]] = psum; 3113 } 3114 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3115 } 3116 3117 buf_size_idxs = 0; 3118 buf_size_vals = 0; 3119 buf_size_idxs_is = 0; 3120 for (i=0;i<n_recvs;i++) { 3121 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3122 buf_size_vals += (PetscInt)olengths_vals[i]; 3123 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3124 } 3125 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 3126 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 3127 ierr = PetscMalloc(buf_size_idxs_is*sizeof(PetscInt),&recv_buffer_idxs_is);CHKERRQ(ierr); 3128 3129 /* get new tags for clean communications */ 3130 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3131 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3133 3134 /* allocate for requests */ 3135 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 3136 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 3137 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs_is);CHKERRQ(ierr); 3138 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 3139 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 3140 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs_is);CHKERRQ(ierr); 3141 3142 /* communications */ 3143 ptr_idxs = recv_buffer_idxs; 3144 ptr_vals = recv_buffer_vals; 3145 ptr_idxs_is = recv_buffer_idxs_is; 3146 for (i=0;i<n_recvs;i++) { 3147 source_dest = onodes[i]; 3148 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3149 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3150 ptr_idxs += olengths_idxs[i]; 3151 ptr_vals += olengths_vals[i]; 3152 if (nis) { 3153 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3154 ptr_idxs_is += olengths_idxs_is[i]; 3155 } 3156 } 3157 for (i=0;i<n_sends;i++) { 3158 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3159 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3160 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3161 if (nis) { 3162 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3163 } 3164 } 3165 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3166 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3167 3168 /* assemble new l2g map */ 3169 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3170 ptr_idxs = recv_buffer_idxs; 3171 buf_size_idxs = 0; 3172 for (i=0;i<n_recvs;i++) { 3173 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3174 ptr_idxs += olengths_idxs[i]; 3175 } 3176 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 3177 ptr_idxs = recv_buffer_idxs; 3178 buf_size_idxs = 0; 3179 for (i=0;i<n_recvs;i++) { 3180 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3181 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3182 ptr_idxs += olengths_idxs[i]; 3183 } 3184 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3185 ierr = ISLocalToGlobalMappingCreate(comm_n,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3186 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3187 3188 /* infer new local matrix type from received local matrices type */ 3189 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3190 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3191 if (n_recvs) { 3192 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3193 ptr_idxs = recv_buffer_idxs; 3194 for (i=0;i<n_recvs;i++) { 3195 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3196 new_local_type_private = MATAIJ_PRIVATE; 3197 break; 3198 } 3199 ptr_idxs += olengths_idxs[i]; 3200 } 3201 switch (new_local_type_private) { 3202 case MATDENSE_PRIVATE: 3203 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3204 new_local_type = MATSEQAIJ; 3205 bs = 1; 3206 } else { /* if I receive only 1 dense matrix */ 3207 new_local_type = MATSEQDENSE; 3208 bs = 1; 3209 } 3210 break; 3211 case MATAIJ_PRIVATE: 3212 new_local_type = MATSEQAIJ; 3213 bs = 1; 3214 break; 3215 case MATBAIJ_PRIVATE: 3216 new_local_type = MATSEQBAIJ; 3217 break; 3218 case MATSBAIJ_PRIVATE: 3219 new_local_type = MATSEQSBAIJ; 3220 break; 3221 default: 3222 SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3223 break; 3224 } 3225 } else { /* by default, new_local_type is seqdense */ 3226 new_local_type = MATSEQDENSE; 3227 bs = 1; 3228 } 3229 3230 /* create MATIS object if needed */ 3231 if (reuse == MAT_INITIAL_MATRIX) { 3232 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3233 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3234 } else { 3235 /* it also destroys the local matrices */ 3236 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3237 } 3238 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3239 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3240 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3241 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3242 3243 /* set values */ 3244 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3245 ptr_vals = recv_buffer_vals; 3246 ptr_idxs = recv_buffer_idxs; 3247 for (i=0;i<n_recvs;i++) { 3248 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3249 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3250 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3251 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3252 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3253 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3254 } else { 3255 /* TODO */ 3256 } 3257 ptr_idxs += olengths_idxs[i]; 3258 ptr_vals += olengths_vals[i]; 3259 } 3260 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3261 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3262 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3263 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3264 3265 #if 1 3266 if (!restrict_comm) { /* check */ 3267 Vec lvec,rvec; 3268 PetscReal infty_error; 3269 3270 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3271 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3272 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3273 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3274 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3275 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3276 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3277 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3278 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3279 } 3280 #endif 3281 3282 /* assemble new additional is (if any) */ 3283 if (nis) { 3284 PetscInt **temp_idxs,*count_is,j,psum; 3285 3286 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3287 ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr); 3288 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3289 ptr_idxs = recv_buffer_idxs_is; 3290 psum = 0; 3291 for (i=0;i<n_recvs;i++) { 3292 for (j=0;j<nis;j++) { 3293 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3294 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3295 psum += plen; 3296 ptr_idxs += plen+1; /* shift pointer to received data */ 3297 } 3298 } 3299 ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr); 3300 ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr); 3301 for (i=1;i<nis;i++) { 3302 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3303 } 3304 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3305 ptr_idxs = recv_buffer_idxs_is; 3306 for (i=0;i<n_recvs;i++) { 3307 for (j=0;j<nis;j++) { 3308 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3309 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3310 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3311 ptr_idxs += plen+1; /* shift pointer to received data */ 3312 } 3313 } 3314 for (i=0;i<nis;i++) { 3315 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3316 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3317 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3318 } 3319 ierr = PetscFree(count_is);CHKERRQ(ierr); 3320 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3321 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3322 } 3323 /* free workspace */ 3324 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3325 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3326 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3327 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3328 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3329 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3330 if (isdense) { 3331 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3332 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3333 } else { 3334 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3335 } 3336 if (nis) { 3337 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3338 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3339 } 3340 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3341 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3342 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3343 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3344 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3345 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3346 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3347 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3348 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3349 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3350 ierr = PetscFree(onodes);CHKERRQ(ierr); 3351 if (nis) { 3352 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3353 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3354 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3355 } 3356 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3357 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3358 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3359 for (i=0;i<nis;i++) { 3360 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3361 } 3362 } 3363 PetscFunctionReturn(0); 3364 } 3365 3366 /* temporary hack into ksp private data structure */ 3367 #include <petsc-private/kspimpl.h> 3368 3369 #undef __FUNCT__ 3370 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3371 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3372 { 3373 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3374 PC_IS *pcis = (PC_IS*)pc->data; 3375 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3376 MatNullSpace CoarseNullSpace=NULL; 3377 ISLocalToGlobalMapping coarse_islg; 3378 IS coarse_is; 3379 PetscInt max_it; 3380 PetscInt im_active=-1,active_procs=-1; 3381 PC pc_temp; 3382 PCType coarse_pc_type; 3383 KSPType coarse_ksp_type; 3384 PetscBool multilevel_requested,multilevel_allowed; 3385 PetscBool setsym,issym,isherm,ispreonly,isbddc,isnn,coarse_reuse; 3386 MatStructure matstruct; 3387 PetscErrorCode ierr; 3388 3389 PetscFunctionBegin; 3390 /* Assign global numbering to coarse dofs */ 3391 if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */ 3392 PetscInt ocoarse_size; 3393 ocoarse_size = pcbddc->coarse_size; 3394 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3395 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3396 /* see if we can avoid some work */ 3397 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3398 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3399 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3400 coarse_reuse = PETSC_FALSE; 3401 } else { /* we can safely reuse already computed coarse matrix */ 3402 coarse_reuse = PETSC_TRUE; 3403 } 3404 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3405 coarse_reuse = PETSC_FALSE; 3406 } 3407 /* reset any subassembling information */ 3408 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3409 } else { /* primal space has not been changed, so we can reuse coarse matrix */ 3410 coarse_reuse = PETSC_TRUE; 3411 } 3412 3413 /* test if we can go multilevel */ 3414 multilevel_allowed = PETSC_FALSE; 3415 multilevel_requested = PETSC_FALSE; 3416 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 3417 if (multilevel_requested) { 3418 /* count "active processes" */ 3419 im_active = !!(pcis->n); 3420 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3421 if (active_procs/pcbddc->coarsening_ratio < 2) { 3422 multilevel_allowed = PETSC_FALSE; 3423 } else { 3424 multilevel_allowed = PETSC_TRUE; 3425 } 3426 } 3427 3428 /* creates temporary l2gmap and IS for coarse indexes */ 3429 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3430 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3431 3432 /* creates temporary MATIS object for coarse matrix */ 3433 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3434 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3435 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3436 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3437 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3438 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3439 3440 /* set defaults for coarse KSP and PC */ 3441 if (multilevel_allowed) { 3442 coarse_ksp_type = KSPRICHARDSON; 3443 coarse_pc_type = PCBDDC; 3444 } else { 3445 coarse_ksp_type = KSPPREONLY; 3446 coarse_pc_type = PCREDUNDANT; 3447 } 3448 3449 /* create the coarse KSP object only once with defaults */ 3450 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3451 if (!pcbddc->coarse_ksp) { 3452 char prefix[256],str_level[16]; 3453 size_t len; 3454 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3455 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3456 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3457 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is,matstruct);CHKERRQ(ierr); 3458 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3459 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3460 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3461 /* prefix */ 3462 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3463 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3464 if (!pcbddc->current_level) { 3465 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3466 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3467 } else { 3468 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3469 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3470 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3471 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3472 *(prefix+len)='\0'; 3473 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3474 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3475 } 3476 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3477 /* allow user customization */ 3478 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3479 } 3480 3481 /* get some info after set from options */ 3482 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3483 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3484 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3485 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3486 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3487 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3488 isbddc = PETSC_FALSE; 3489 } 3490 3491 /* propagate BDDC info to the next level */ 3492 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3493 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3494 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3495 if (isbddc) { /* protects from unneded computations */ 3496 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3497 const PetscInt *idxs; 3498 IS *c_isfordofs,c_isneumann; 3499 ISLocalToGlobalMapping tmap; 3500 3501 /* create map between primal indices (in local representative ordering) and local subdomain numbering */ 3502 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3503 /* allocate space for temporary storage */ 3504 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3505 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3506 /* dofs splitting */ 3507 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&c_isfordofs);CHKERRQ(ierr); 3508 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3509 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3510 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3511 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3512 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3513 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3514 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3515 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&c_isfordofs[i]);CHKERRQ(ierr); 3516 /* ierr = ISView(c_isfordofs[i],0);CHKERRQ(ierr); */ 3517 } 3518 ierr = PCBDDCSetDofsSplitting(pc_temp,pcbddc->n_ISForDofsLocal,c_isfordofs);CHKERRQ(ierr); 3519 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3520 ierr = ISDestroy(&c_isfordofs[i]);CHKERRQ(ierr); 3521 } 3522 ierr = PetscFree(c_isfordofs);CHKERRQ(ierr); 3523 /* neumann boundaries */ 3524 if (pcbddc->NeumannBoundariesLocal) { 3525 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3526 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3527 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3528 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3529 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3530 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3531 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&c_isneumann);CHKERRQ(ierr); 3532 /* ierr = ISView(c_isneumann,0);CHKERRQ(ierr); */ 3533 ierr = PCBDDCSetNeumannBoundaries(pc_temp,c_isneumann);CHKERRQ(ierr); 3534 } 3535 ierr = ISDestroy(&c_isneumann);CHKERRQ(ierr); 3536 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3537 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3538 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3539 } 3540 3541 /* destroy no longer needed map */ 3542 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3543 3544 /* assemble coarse matrix */ 3545 if (isbddc || isnn) { 3546 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3547 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3548 if (pcbddc->dbg_flag) { 3549 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3550 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3551 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 3552 } 3553 } 3554 if (coarse_reuse) { 3555 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3556 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3557 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,MAT_REUSE_MATRIX,&coarse_mat,0,NULL);CHKERRQ(ierr); 3558 } else { 3559 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat,0,NULL);CHKERRQ(ierr); 3560 } 3561 } else { 3562 if (coarse_reuse) { 3563 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3564 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3565 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr); 3566 } else { 3567 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3568 } 3569 } 3570 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3571 3572 /* create local to global scatters for coarse problem */ 3573 if (pcbddc->new_primal_space) { 3574 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3575 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 3576 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3577 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3578 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3579 } 3580 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3581 3582 /* propagate symmetry info to coarse matrix */ 3583 issym = PETSC_FALSE; 3584 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3585 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3586 isherm = PETSC_FALSE; 3587 ierr = MatIsHermitianKnown(pc->pmat,&setsym,&isherm);CHKERRQ(ierr); 3588 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3589 3590 /* temporary disabled code */ 3591 #if 0 3592 /* Compute coarse null space (special handling by BDDC only) */ 3593 if (pcbddc->NullSpace) { 3594 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3595 if (isbddc) { 3596 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3597 } else { 3598 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3599 } 3600 } 3601 #endif 3602 3603 /* set operators */ 3604 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3605 3606 /* additional KSP customization */ 3607 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3608 if (max_it < 5) { 3609 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3610 } 3611 3612 /* print some info if requested */ 3613 if (pcbddc->dbg_flag) { 3614 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3615 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3616 if (!multilevel_allowed) { 3617 if (multilevel_requested) { 3618 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3619 } else if (pcbddc->max_levels) { 3620 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3621 } 3622 } 3623 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3624 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3625 } 3626 3627 /* setup coarse ksp */ 3628 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3629 /* hack -> use a DM on coarse_ksp to remove this hack */ 3630 ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_rhs),&((pcbddc->coarse_ksp)->vec_sol));CHKERRQ(ierr); 3631 3632 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3633 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3634 if (pcbddc->dbg_flag || !ispreonly) { 3635 KSP check_ksp; 3636 KSPType check_ksp_type; 3637 PC check_pc; 3638 Vec check_vec; 3639 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3640 PetscInt its; 3641 PetscBool compute; 3642 3643 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3644 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3645 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3646 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3647 if (ispreonly) { 3648 check_ksp_type = KSPPREONLY; 3649 compute = PETSC_FALSE; 3650 } else { 3651 check_ksp_type = KSPGMRES; 3652 compute = PETSC_TRUE; 3653 } 3654 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3655 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3656 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3657 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3658 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3659 /* create random vec */ 3660 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3661 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3662 if (CoarseNullSpace) { 3663 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3664 } 3665 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3666 /* solve coarse problem */ 3667 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3668 if (CoarseNullSpace) { 3669 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3670 } 3671 /* set eigenvalue estimation if preonly has not been requested */ 3672 if (!ispreonly) { 3673 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3674 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Coarse problem eigenvalues: %1.6e %1.6e\n",lambda_min,lambda_max);CHKERRQ(ierr); */ 3675 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3676 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3677 } 3678 /* check coarse problem residual error */ 3679 if (pcbddc->dbg_flag) { 3680 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3681 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3682 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3683 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3684 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3685 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3686 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3687 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3688 if (!ispreonly) { 3689 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3690 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3691 } 3692 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3693 } 3694 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3695 } 3696 3697 /* print additional info */ 3698 if (pcbddc->dbg_flag) { 3699 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3700 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3701 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3702 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3703 } 3704 3705 /* free memory */ 3706 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3707 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3708 PetscFunctionReturn(0); 3709 } 3710 3711 #undef __FUNCT__ 3712 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3713 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3714 { 3715 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3716 PC_IS* pcis = (PC_IS*)pc->data; 3717 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3718 PetscInt i,coarse_size; 3719 PetscInt *local_primal_indices; 3720 PetscErrorCode ierr; 3721 3722 PetscFunctionBegin; 3723 /* Compute global number of coarse dofs */ 3724 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3725 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3726 } 3727 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3728 3729 /* check numbering */ 3730 if (pcbddc->dbg_flag) { 3731 PetscScalar coarsesum,*array; 3732 3733 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3734 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3735 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3736 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3737 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3738 for (i=0;i<pcbddc->local_primal_size;i++) { 3739 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3740 } 3741 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3742 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3743 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3744 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3745 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3746 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3747 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3748 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3749 for (i=0;i<pcis->n;i++) { 3750 if (array[i] == 1.0) { 3751 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3752 } 3753 } 3754 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3755 for (i=0;i<pcis->n;i++) { 3756 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3757 } 3758 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3759 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3760 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3761 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3762 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3763 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3764 if (pcbddc->dbg_flag > 1) { 3765 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3766 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3767 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3768 for (i=0;i<pcbddc->local_primal_size;i++) { 3769 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3770 } 3771 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3772 } 3773 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3774 } 3775 /* get back data */ 3776 *coarse_size_n = coarse_size; 3777 *local_primal_indices_n = local_primal_indices; 3778 PetscFunctionReturn(0); 3779 } 3780 3781 #undef __FUNCT__ 3782 #define __FUNCT__ "PCBDDCGlobalToLocal" 3783 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis) 3784 { 3785 PC_IS* pcis = (PC_IS*)pc->data; 3786 IS localis_t; 3787 PetscInt i,lsize,*idxs; 3788 PetscScalar *vals; 3789 PetscErrorCode ierr; 3790 3791 PetscFunctionBegin; 3792 /* get dirichlet indices in local ordering exploiting local to global map */ 3793 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 3794 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3795 for (i=0;i<lsize;i++) vals[i] = 1.0; 3796 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3797 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3798 if (idxs) { /* multilevel guard */ 3799 ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3800 } 3801 ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr); 3802 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3803 ierr = PetscFree(vals);CHKERRQ(ierr); 3804 ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr); 3805 /* now compute dirichlet set in local ordering */ 3806 ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3807 ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3808 ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3809 for (i=0,lsize=0;i<pcis->n;i++) { 3810 if (vals[i] == 1.0) { 3811 lsize++; 3812 } 3813 } 3814 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 3815 for (i=0,lsize=0;i<pcis->n;i++) { 3816 if (vals[i] == 1.0) { 3817 idxs[lsize++] = i; 3818 } 3819 } 3820 ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3821 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 3822 *localis = localis_t; 3823 PetscFunctionReturn(0); 3824 } 3825