1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCSetUpSolvers" 10 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscScalar *coarse_submat_vals; 14 PetscErrorCode ierr; 15 16 PetscFunctionBegin; 17 /* Compute matrix after change of basis and extract local submatrices */ 18 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D */ 21 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 22 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 23 24 /* Setup local solvers ksp_D and ksp_R */ 25 /* PCBDDCSetUpLocalScatters should be called first! */ 26 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 27 28 /* Change global null space passed in by the user if change of basis has been requested */ 29 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 30 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 31 } 32 33 /* 34 Setup local correction and local part of coarse basis. 35 Gives back the dense local part of the coarse matrix in column major ordering 36 */ 37 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 38 39 /* Compute total number of coarse nodes and setup coarse solver */ 40 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 41 42 /* free */ 43 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 #undef __FUNCT__ 48 #define __FUNCT__ "PCBDDCResetCustomization" 49 PetscErrorCode PCBDDCResetCustomization(PC pc) 50 { 51 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 52 PetscErrorCode ierr; 53 54 PetscFunctionBegin; 55 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 57 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 59 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 61 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 62 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 63 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 64 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 65 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 #undef __FUNCT__ 70 #define __FUNCT__ "PCBDDCResetTopography" 71 PetscErrorCode PCBDDCResetTopography(PC pc) 72 { 73 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 74 PetscErrorCode ierr; 75 76 PetscFunctionBegin; 77 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 78 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 79 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 80 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 81 PetscFunctionReturn(0); 82 } 83 84 #undef __FUNCT__ 85 #define __FUNCT__ "PCBDDCResetSolvers" 86 PetscErrorCode PCBDDCResetSolvers(PC pc) 87 { 88 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 89 PetscErrorCode ierr; 90 91 PetscFunctionBegin; 92 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 93 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 100 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 101 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 102 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 103 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 104 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 106 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 107 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 108 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 109 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 110 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 111 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 112 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 113 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 114 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 115 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 116 PetscFunctionReturn(0); 117 } 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 121 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 122 { 123 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 124 PC_IS *pcis = (PC_IS*)pc->data; 125 VecType impVecType; 126 PetscInt n_constraints,n_R,old_size; 127 PetscErrorCode ierr; 128 129 PetscFunctionBegin; 130 if (!pcbddc->ConstraintMatrix) { 131 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 132 } 133 /* get sizes */ 134 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 135 n_R = pcis->n-pcbddc->n_actual_vertices; 136 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 137 /* local work vectors (try to avoid unneeded work)*/ 138 /* R nodes */ 139 old_size = -1; 140 if (pcbddc->vec1_R) { 141 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 142 } 143 if (n_R != old_size) { 144 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 145 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 146 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 147 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 148 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 149 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 150 } 151 /* local primal dofs */ 152 old_size = -1; 153 if (pcbddc->vec1_P) { 154 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 155 } 156 if (pcbddc->local_primal_size != old_size) { 157 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 158 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 159 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 160 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 161 } 162 /* local explicit constraints */ 163 old_size = -1; 164 if (pcbddc->vec1_C) { 165 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 166 } 167 if (n_constraints && n_constraints != old_size) { 168 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 169 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 170 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 171 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 172 } 173 PetscFunctionReturn(0); 174 } 175 176 #undef __FUNCT__ 177 #define __FUNCT__ "PCBDDCSetUpCorrection" 178 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 179 { 180 PetscErrorCode ierr; 181 /* pointers to pcis and pcbddc */ 182 PC_IS* pcis = (PC_IS*)pc->data; 183 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 184 /* submatrices of local problem */ 185 Mat A_RV,A_VR,A_VV; 186 /* working matrices */ 187 Mat M1,M2,M3,C_CR; 188 /* working vectors */ 189 Vec vec1_C,vec2_C,vec1_V,vec2_V; 190 /* additional working stuff */ 191 IS is_aux; 192 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 193 const PetscScalar *array,*row_cmat_values; 194 const PetscInt *row_cmat_indices,*idx_R_local; 195 PetscInt *idx_V_B,*auxindices; 196 PetscInt n_vertices,n_constraints,size_of_constraint; 197 PetscInt i,j,n_R,n_D,n_B; 198 PetscBool unsymmetric_check; 199 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 200 MatType impMatType; 201 /* some shortcuts to scalars */ 202 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 203 /* for debugging purposes */ 204 PetscReal *coarsefunctions_errors,*constraints_errors; 205 206 PetscFunctionBegin; 207 /* get number of vertices (corners plus constraints with change of basis) 208 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 209 n_vertices = pcbddc->n_actual_vertices; 210 n_constraints = pcbddc->local_primal_size-n_vertices; 211 /* Set Non-overlapping dimensions */ 212 n_B = pcis->n_B; n_D = pcis->n - n_B; 213 n_R = pcis->n-n_vertices; 214 215 /* Set types for local objects needed by BDDC precondtioner */ 216 impMatType = MATSEQDENSE; 217 218 /* Allocating some extra storage just to be safe */ 219 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 220 for (i=0;i<pcis->n;i++) auxindices[i]=i; 221 222 /* vertices in boundary numbering */ 223 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 224 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 225 if (i != n_vertices) { 226 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 227 } 228 229 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 230 if (n_constraints) { 231 /* see if we can save some allocations */ 232 if (pcbddc->local_auxmat2) { 233 PetscInt on_R,on_constraints; 234 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 235 if (on_R != n_R || on_constraints != n_constraints) { 236 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 237 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 238 } 239 } 240 /* work vectors */ 241 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 242 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 243 /* auxiliary matrices */ 244 if (!pcbddc->local_auxmat2) { 245 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 246 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 247 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 248 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 249 } 250 251 /* Extract constraints on R nodes: C_{CR} */ 252 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 253 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 254 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 255 256 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 257 for (i=0;i<n_constraints;i++) { 258 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 259 /* Get row of constraint matrix in R numbering */ 260 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 261 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 262 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 263 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 264 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 265 /* Solve for row of constraint matrix in R numbering */ 266 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 267 /* Set values in local_auxmat2 */ 268 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 269 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 270 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 271 } 272 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 273 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 274 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 275 276 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 277 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 278 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 279 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 280 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 281 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 282 ierr = MatSetUp(M1);CHKERRQ(ierr); 283 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 284 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 285 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 286 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 287 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 288 ierr = MatDestroy(&M2);CHKERRQ(ierr); 289 ierr = MatDestroy(&M3);CHKERRQ(ierr); 290 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 291 if (!pcbddc->local_auxmat1) { 292 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 293 } else { 294 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 295 } 296 } 297 298 /* Get submatrices from subdomain matrix */ 299 if (n_vertices) { 300 PetscInt ibs,mbs; 301 PetscBool issbaij; 302 Mat newmat; 303 304 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 305 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 306 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 307 if (ibs != mbs) { /* need to convert to SEQAIJ */ 308 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 309 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 310 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 311 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 312 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 313 } else { 314 /* this is safe */ 315 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 316 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 317 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 318 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 319 /* which of the two approaches is faster? */ 320 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 321 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 322 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 323 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 324 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 325 } else { 326 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 327 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 328 } 329 } 330 ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 331 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 332 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 333 } 334 335 /* Matrix of coarse basis functions (local) */ 336 if (pcbddc->coarse_phi_B) { 337 PetscInt on_B,on_primal; 338 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 339 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 340 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 341 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 342 } 343 } 344 if (pcbddc->coarse_phi_D) { 345 PetscInt on_D,on_primal; 346 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 347 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 348 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 349 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 350 } 351 } 352 if (!pcbddc->coarse_phi_B) { 353 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 354 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 355 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 356 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 357 } 358 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 359 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 360 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 361 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 362 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 363 } 364 365 if (pcbddc->dbg_flag) { 366 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 367 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 368 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 369 } 370 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 371 ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr); 372 373 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 374 375 /* vertices */ 376 for (i=0;i<n_vertices;i++) { 377 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 378 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 379 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 380 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 381 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 382 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 383 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 384 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 385 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 386 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 387 if (n_constraints) { 388 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 389 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 390 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 391 } 392 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 393 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 394 395 /* Set values in coarse basis function and subdomain part of coarse_mat */ 396 /* coarse basis functions */ 397 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 398 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 399 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 400 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 401 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 402 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 403 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 404 if (pcbddc->switch_static || pcbddc->dbg_flag) { 405 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 406 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 407 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 408 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 409 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 410 } 411 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 412 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 413 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 415 if (n_constraints) { 416 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 417 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 418 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 419 } 420 421 /* check */ 422 if (pcbddc->dbg_flag) { 423 /* assemble subdomain vector on local nodes */ 424 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 425 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 426 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 427 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 428 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 429 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 430 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 431 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 432 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 433 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 434 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 436 if (n_constraints) { 437 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 438 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 440 } 441 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 442 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 443 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 444 /* check saddle point solution */ 445 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 446 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 447 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 448 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 449 /* shift by the identity matrix */ 450 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 451 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 452 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 453 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 454 } 455 } 456 457 /* constraints */ 458 for (i=0;i<n_constraints;i++) { 459 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 460 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 461 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 462 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 463 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 464 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 465 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 466 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 467 if (n_vertices) { 468 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 469 } 470 /* Set values in coarse basis function and subdomain part of coarse_mat */ 471 /* coarse basis functions */ 472 j = i+n_vertices; /* don't touch this! */ 473 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 474 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 475 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 476 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 477 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 478 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 479 if (pcbddc->switch_static || pcbddc->dbg_flag) { 480 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 481 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 482 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 483 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 484 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 485 } 486 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 487 if (n_vertices) { 488 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 489 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 490 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 491 } 492 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 493 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 494 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 495 496 if (pcbddc->dbg_flag) { 497 /* assemble subdomain vector on nodes */ 498 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 499 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 500 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 501 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 502 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 503 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 504 /* assemble subdomain vector of lagrange multipliers */ 505 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 506 if (n_vertices) { 507 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 508 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 509 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 510 } 511 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 512 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 513 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 514 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 515 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 516 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 517 /* check saddle point solution */ 518 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 519 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 520 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 521 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 522 /* shift by the identity matrix */ 523 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 524 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 525 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 526 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 527 } 528 } 529 /* call assembling routines for local coarse basis */ 530 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 532 if (pcbddc->switch_static || pcbddc->dbg_flag) { 533 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 535 } 536 537 /* compute other basis functions for non-symmetric problems */ 538 /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/ 539 { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */ 540 PetscBool setsym; 541 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr); 542 if (!setsym) pcbddc->issym = PETSC_FALSE; 543 } 544 545 if (!pcbddc->issym) { 546 if (!pcbddc->coarse_psi_B) { 547 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 548 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 549 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 550 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 551 } 552 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 553 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 554 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 555 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 556 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 557 } 558 for (i=0;i<pcbddc->local_primal_size;i++) { 559 if (n_constraints) { 560 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 561 for (j=0;j<n_constraints;j++) { 562 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 563 } 564 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 565 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 566 } 567 if (i<n_vertices) { 568 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 569 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 570 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 571 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 572 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 573 if (n_constraints) { 574 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 575 } 576 } else { 577 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 578 } 579 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 580 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 581 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 582 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 583 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 584 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 585 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 586 if (i<n_vertices) { 587 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 588 } 589 if (pcbddc->switch_static || pcbddc->dbg_flag) { 590 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 591 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 592 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 593 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 594 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 595 } 596 597 if (pcbddc->dbg_flag) { 598 /* assemble subdomain vector on nodes */ 599 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 600 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 601 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 602 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 603 if (i<n_vertices) { 604 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 605 } 606 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 607 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 608 /* assemble subdomain vector of lagrange multipliers */ 609 for (j=0;j<pcbddc->local_primal_size;j++) { 610 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 611 } 612 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 613 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 614 /* check saddle point solution */ 615 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 616 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 617 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 618 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 619 /* shift by the identity matrix */ 620 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 621 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 622 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 623 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 624 } 625 } 626 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 627 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 628 if (pcbddc->switch_static || pcbddc->dbg_flag) { 629 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 630 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 631 } 632 unsymmetric_check = PETSC_TRUE; 633 } else { /* take references to already computed coarse basis */ 634 unsymmetric_check = PETSC_FALSE; 635 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 636 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 637 if (pcbddc->coarse_phi_D) { 638 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 639 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 640 } 641 } 642 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 643 /* Checking coarse_sub_mat and coarse basis functios */ 644 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 645 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 646 if (pcbddc->dbg_flag) { 647 Mat coarse_sub_mat; 648 Mat AUXMAT,TM1,TM2,TM3,TM4; 649 Mat coarse_phi_D,coarse_phi_B; 650 Mat coarse_psi_D,coarse_psi_B; 651 Mat A_II,A_BB,A_IB,A_BI; 652 MatType checkmattype=MATSEQAIJ; 653 PetscReal real_value; 654 655 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 656 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 657 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 658 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 659 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 660 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 661 if (unsymmetric_check) { 662 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 663 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 664 } 665 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 666 667 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 668 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 669 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 670 if (unsymmetric_check) { 671 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 672 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 673 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 674 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 675 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 676 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 677 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 678 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 679 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 680 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 681 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 682 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 683 } else { 684 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 685 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 686 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 687 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 688 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 689 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 690 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 691 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 692 } 693 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 694 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 695 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 696 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 697 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 698 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 699 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 701 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 702 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 704 for (i=0;i<pcbddc->local_primal_size;i++) { 705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 706 } 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 708 for (i=0;i<pcbddc->local_primal_size;i++) { 709 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 710 } 711 if (unsymmetric_check) { 712 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 713 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 714 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 715 } 716 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 717 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 718 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 719 } 720 } 721 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 722 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 723 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 724 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 725 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 726 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 727 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 728 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 729 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 730 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 731 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 732 if (unsymmetric_check) { 733 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 734 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 735 } 736 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 737 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 738 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 739 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 740 } 741 /* free memory */ 742 if (n_vertices) { 743 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 744 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 745 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 746 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 747 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 748 } 749 if (n_constraints) { 750 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 751 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 752 ierr = MatDestroy(&M1);CHKERRQ(ierr); 753 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 754 } 755 ierr = PetscFree(auxindices);CHKERRQ(ierr); 756 /* get back data */ 757 *coarse_submat_vals_n = coarse_submat_vals; 758 PetscFunctionReturn(0); 759 } 760 761 #undef __FUNCT__ 762 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 763 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 764 { 765 PC_IS* pcis = (PC_IS*)(pc->data); 766 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 767 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 768 PetscBool issbaij,isseqaij; 769 /* manage repeated solves */ 770 MatReuse reuse; 771 PetscErrorCode ierr; 772 773 PetscFunctionBegin; 774 if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) { 775 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 776 } 777 /* get mat flags */ 778 reuse = MAT_INITIAL_MATRIX; 779 if (pc->setupcalled) { 780 if (pc->flag == SAME_NONZERO_PATTERN) { 781 reuse = MAT_REUSE_MATRIX; 782 } else { 783 reuse = MAT_INITIAL_MATRIX; 784 } 785 } 786 if (reuse == MAT_INITIAL_MATRIX) { 787 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 788 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 789 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 790 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 791 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 792 } 793 794 /* transform local matrices if needed */ 795 if (pcbddc->ChangeOfBasisMatrix) { 796 Mat *change_mat_all; 797 IS is_local,is_global; 798 PetscBool sorted; 799 PetscInt *idxs_perm; 800 801 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_local);CHKERRQ(ierr); 802 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 803 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 804 ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr); 805 if (!sorted) { 806 const PetscInt *idxs; 807 PetscInt *idxs_sorted,i; 808 809 ierr = PetscMalloc1(pcis->n,&idxs_perm);CHKERRQ(ierr); 810 ierr = PetscMalloc1(pcis->n,&idxs_sorted);CHKERRQ(ierr); 811 for (i=0;i<pcis->n;i++) { 812 idxs_perm[i] = i; 813 } 814 ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr); 815 ierr = PetscSortIntWithPermutation(pcis->n,idxs,idxs_perm);CHKERRQ(ierr); 816 for (i=0;i<pcis->n;i++) { 817 idxs_sorted[i] = idxs[idxs_perm[i]]; 818 } 819 ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr); 820 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 821 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr); 822 } 823 824 /* get change of basis on the whole set of local dofs */ 825 ierr = MatGetSubMatrices(pcbddc->ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr); 826 827 if (!sorted) { 828 Mat new_mat; 829 IS is_perm; 830 PetscInt *idxs,i; 831 832 ierr = PetscMalloc1(pcis->n,&idxs);CHKERRQ(ierr); 833 for (i=0;i<pcis->n;i++) { 834 idxs[idxs_perm[i]] = i; 835 } 836 ierr = PetscFree(idxs_perm);CHKERRQ(ierr); 837 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr); 838 ierr = ISSetPermutation(is_perm);CHKERRQ(ierr); 839 ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr); 840 ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr); 841 change_mat_all[0] = new_mat; 842 ierr = ISDestroy(&is_perm);CHKERRQ(ierr); 843 } 844 845 /* check */ 846 if (pcbddc->dbg_flag) { 847 Vec x,x_change; 848 PetscReal error; 849 850 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 851 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 852 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 853 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 854 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 855 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 856 ierr = MatMult(change_mat_all[0],pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 857 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 858 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 859 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 860 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 861 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 862 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 863 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 864 ierr = VecDestroy(&x);CHKERRQ(ierr); 865 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 866 } 867 868 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 869 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 870 if (isseqaij) { 871 ierr = MatPtAP(matis->A,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 872 } else { 873 Mat work_mat; 874 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 875 ierr = MatPtAP(work_mat,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 876 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 877 } 878 /* 879 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 880 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 881 */ 882 ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr); 883 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 884 } else { 885 /* without change of basis, the local matrix is unchanged */ 886 if (!pcbddc->local_mat) { 887 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 888 pcbddc->local_mat = matis->A; 889 } 890 } 891 892 /* get submatrices */ 893 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 894 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 895 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 896 if (!issbaij) { 897 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 898 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 899 } else { 900 Mat newmat; 901 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 902 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 903 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 904 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 905 } 906 PetscFunctionReturn(0); 907 } 908 909 #undef __FUNCT__ 910 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 911 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 912 { 913 PC_IS* pcis = (PC_IS*)(pc->data); 914 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 915 IS is_aux1,is_aux2; 916 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 917 PetscInt n_vertices,i,j,n_R,n_D,n_B; 918 PetscInt vbs,bs; 919 PetscBT bitmask; 920 PetscErrorCode ierr; 921 922 PetscFunctionBegin; 923 /* 924 No need to setup local scatters if 925 - primal space is unchanged 926 AND 927 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 928 AND 929 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 930 */ 931 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 932 PetscFunctionReturn(0); 933 } 934 /* destroy old objects */ 935 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 936 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 937 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 938 /* Set Non-overlapping dimensions */ 939 n_B = pcis->n_B; n_D = pcis->n - n_B; 940 n_vertices = pcbddc->n_actual_vertices; 941 /* create auxiliary bitmask */ 942 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 943 for (i=0;i<n_vertices;i++) { 944 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 945 } 946 947 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 948 ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr); 949 for (i=0, n_R=0; i<pcis->n; i++) { 950 if (!PetscBTLookup(bitmask,i)) { 951 idx_R_local[n_R] = i; 952 n_R++; 953 } 954 } 955 956 /* Block code */ 957 vbs = 1; 958 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 959 if (bs>1 && !(n_vertices%bs)) { 960 PetscBool is_blocked = PETSC_TRUE; 961 PetscInt *vary; 962 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 963 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 964 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 965 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 966 for (i=0; i<n_vertices; i++) { 967 if (vary[i]!=0 && vary[i]!=bs) { 968 is_blocked = PETSC_FALSE; 969 break; 970 } 971 } 972 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 973 vbs = bs; 974 for (i=0;i<n_R/vbs;i++) { 975 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 976 } 977 } 978 ierr = PetscFree(vary);CHKERRQ(ierr); 979 } 980 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 981 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 982 983 /* print some info if requested */ 984 if (pcbddc->dbg_flag) { 985 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 986 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 987 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 988 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 989 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 990 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 991 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 992 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 993 } 994 995 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 996 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 997 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr); 998 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr); 999 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1000 for (i=0; i<n_D; i++) { 1001 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1002 } 1003 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1004 for (i=0, j=0; i<n_R; i++) { 1005 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1006 aux_array1[j++] = i; 1007 } 1008 } 1009 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1010 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1011 for (i=0, j=0; i<n_B; i++) { 1012 if (!PetscBTLookup(bitmask,is_indices[i])) { 1013 aux_array2[j++] = i; 1014 } 1015 } 1016 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1017 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1018 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1019 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1020 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1021 1022 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1023 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1024 for (i=0, j=0; i<n_R; i++) { 1025 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1026 aux_array1[j++] = i; 1027 } 1028 } 1029 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1030 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1031 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1032 } 1033 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1034 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1035 PetscFunctionReturn(0); 1036 } 1037 1038 1039 #undef __FUNCT__ 1040 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1041 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1042 { 1043 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1044 PC_IS *pcis = (PC_IS*)pc->data; 1045 PC pc_temp; 1046 Mat A_RR; 1047 MatReuse reuse; 1048 PetscScalar m_one = -1.0; 1049 PetscReal value; 1050 PetscInt n_D,n_R,ibs,mbs; 1051 PetscBool use_exact,use_exact_reduced,issbaij; 1052 PetscErrorCode ierr; 1053 /* prefixes stuff */ 1054 char dir_prefix[256],neu_prefix[256],str_level[16]; 1055 size_t len; 1056 1057 PetscFunctionBegin; 1058 1059 /* compute prefixes */ 1060 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1061 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1062 if (!pcbddc->current_level) { 1063 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1064 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1065 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1066 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1067 } else { 1068 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1069 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1070 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1071 len -= 15; /* remove "pc_bddc_coarse_" */ 1072 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1073 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1074 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1075 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1076 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1077 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1078 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1079 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1080 } 1081 1082 /* DIRICHLET PROBLEM */ 1083 /* Matrix for Dirichlet problem is pcis->A_II */ 1084 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1085 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1086 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1087 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1088 /* default */ 1089 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1090 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1091 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1092 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1093 if (issbaij) { 1094 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1095 } else { 1096 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1097 } 1098 /* Allow user's customization */ 1099 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1100 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1101 } 1102 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1103 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1104 if (!n_D) { 1105 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1106 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1107 } 1108 /* Set Up KSP for Dirichlet problem of BDDC */ 1109 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1110 /* set ksp_D into pcis data */ 1111 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1112 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1113 pcis->ksp_D = pcbddc->ksp_D; 1114 1115 /* NEUMANN PROBLEM */ 1116 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1117 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1118 if (pcbddc->ksp_R) { /* already created ksp */ 1119 PetscInt nn_R; 1120 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1121 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1122 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1123 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1124 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1125 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1126 reuse = MAT_INITIAL_MATRIX; 1127 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1128 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1129 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1130 reuse = MAT_INITIAL_MATRIX; 1131 } else { /* safe to reuse the matrix */ 1132 reuse = MAT_REUSE_MATRIX; 1133 } 1134 } 1135 /* last check */ 1136 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1137 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1138 reuse = MAT_INITIAL_MATRIX; 1139 } 1140 } else { /* first time, so we need to create the matrix */ 1141 reuse = MAT_INITIAL_MATRIX; 1142 } 1143 /* extract A_RR */ 1144 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1145 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1146 if (ibs != mbs) { 1147 Mat newmat; 1148 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1149 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1150 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1151 } else { 1152 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1153 } 1154 if (!pcbddc->ksp_R) { /* create object if not present */ 1155 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1156 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1157 /* default */ 1158 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1159 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1160 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1161 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1162 if (issbaij) { 1163 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1164 } else { 1165 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1166 } 1167 /* Allow user's customization */ 1168 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1169 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1170 } 1171 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1172 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1173 if (!n_R) { 1174 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1175 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1176 } 1177 /* Set Up KSP for Neumann problem of BDDC */ 1178 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1179 1180 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1181 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1182 /* Dirichlet */ 1183 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1184 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1185 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1186 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1187 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1188 /* need to be adapted? */ 1189 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1190 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1191 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1192 /* print info */ 1193 if (pcbddc->dbg_flag) { 1194 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1195 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1196 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1197 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1198 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1199 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1200 } 1201 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1202 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1203 } 1204 1205 /* Neumann */ 1206 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1207 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1208 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1209 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1210 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1211 /* need to be adapted? */ 1212 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1213 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1214 /* print info */ 1215 if (pcbddc->dbg_flag) { 1216 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1217 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1218 } 1219 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1220 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1221 } 1222 } 1223 /* free Neumann problem's matrix */ 1224 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNCT__ 1229 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1230 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1231 { 1232 PetscErrorCode ierr; 1233 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1234 1235 PetscFunctionBegin; 1236 if (applytranspose) { 1237 if (pcbddc->local_auxmat1) { 1238 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1239 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1240 } 1241 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1242 } else { 1243 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1244 if (pcbddc->local_auxmat1) { 1245 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1246 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1247 } 1248 } 1249 PetscFunctionReturn(0); 1250 } 1251 1252 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1255 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1256 { 1257 PetscErrorCode ierr; 1258 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1259 PC_IS* pcis = (PC_IS*) (pc->data); 1260 const PetscScalar zero = 0.0; 1261 1262 PetscFunctionBegin; 1263 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1264 if (applytranspose) { 1265 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1266 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1267 } else { 1268 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1269 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1270 } 1271 /* start communications from local primal nodes to rhs of coarse solver */ 1272 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1273 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1274 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1275 1276 /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */ 1277 /* TODO remove null space when doing multilevel */ 1278 if (pcbddc->coarse_ksp) { 1279 if (applytranspose) { 1280 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1281 } else { 1282 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1283 } 1284 } 1285 /* start communications from coarse solver solution to local primal nodes */ 1286 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1287 1288 /* Local solution on R nodes */ 1289 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1290 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1291 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1292 if (pcbddc->switch_static) { 1293 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1294 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1295 } 1296 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1297 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1298 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1299 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1300 if (pcbddc->switch_static) { 1301 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1302 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1303 } 1304 1305 /* complete communications from coarse sol to local primal nodes */ 1306 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1307 1308 /* Sum contributions from two levels */ 1309 if (applytranspose) { 1310 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1311 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1312 } else { 1313 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1314 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1315 } 1316 PetscFunctionReturn(0); 1317 } 1318 1319 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1322 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1323 { 1324 PetscErrorCode ierr; 1325 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1326 PetscScalar *array,*array2; 1327 Vec from,to; 1328 1329 PetscFunctionBegin; 1330 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1331 from = pcbddc->coarse_vec; 1332 to = pcbddc->vec1_P; 1333 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1334 Vec tvec; 1335 PetscInt lsize; 1336 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1337 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1338 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1339 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1340 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1341 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1342 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1343 } 1344 } else { /* from local to global -> put data in coarse right hand side */ 1345 from = pcbddc->vec1_P; 1346 to = pcbddc->coarse_vec; 1347 } 1348 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1354 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1355 { 1356 PetscErrorCode ierr; 1357 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1358 PetscScalar *array,*array2; 1359 Vec from,to; 1360 1361 PetscFunctionBegin; 1362 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1363 from = pcbddc->coarse_vec; 1364 to = pcbddc->vec1_P; 1365 } else { /* from local to global -> put data in coarse right hand side */ 1366 from = pcbddc->vec1_P; 1367 to = pcbddc->coarse_vec; 1368 } 1369 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1370 if (smode == SCATTER_FORWARD) { 1371 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1372 Vec tvec; 1373 PetscInt lsize; 1374 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1375 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1376 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1377 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1378 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1379 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1380 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1381 } 1382 } 1383 PetscFunctionReturn(0); 1384 } 1385 1386 /* uncomment for testing purposes */ 1387 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1390 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1391 { 1392 PetscErrorCode ierr; 1393 PC_IS* pcis = (PC_IS*)(pc->data); 1394 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1395 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1396 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1397 MatType impMatType=MATSEQAIJ; 1398 /* one and zero */ 1399 PetscScalar one=1.0,zero=0.0; 1400 /* space to store constraints and their local indices */ 1401 PetscScalar *temp_quadrature_constraint; 1402 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1403 /* iterators */ 1404 PetscInt i,j,k,total_counts,temp_start_ptr; 1405 /* stuff to store connected components stored in pcbddc->mat_graph */ 1406 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1407 PetscInt n_ISForFaces,n_ISForEdges; 1408 /* near null space stuff */ 1409 MatNullSpace nearnullsp; 1410 const Vec *nearnullvecs; 1411 Vec *localnearnullsp; 1412 PetscBool nnsp_has_cnst; 1413 PetscInt nnsp_size; 1414 PetscScalar *array; 1415 /* BLAS integers */ 1416 PetscBLASInt lwork,lierr; 1417 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1418 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1419 /* LAPACK working arrays for SVD or POD */ 1420 PetscBool skip_lapack; 1421 PetscScalar *work; 1422 PetscReal *singular_vals; 1423 #if defined(PETSC_USE_COMPLEX) 1424 PetscReal *rwork; 1425 #endif 1426 #if defined(PETSC_MISSING_LAPACK_GESVD) 1427 PetscBLASInt Blas_one_2=1; 1428 PetscScalar *temp_basis,*correlation_mat; 1429 #else 1430 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1431 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1432 #endif 1433 /* reuse */ 1434 PetscInt olocal_primal_size; 1435 PetscInt *oprimal_indices_local_idxs; 1436 /* change of basis */ 1437 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1438 PetscBool boolforchange,qr_needed; 1439 PetscBT touched,change_basis,qr_needed_idx; 1440 /* auxiliary stuff */ 1441 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1442 /* some quantities */ 1443 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1444 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1445 1446 1447 PetscFunctionBegin; 1448 /* Destroy Mat objects computed previously */ 1449 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1450 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1451 /* Get index sets for faces, edges and vertices from graph */ 1452 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1453 /* free unneeded index sets */ 1454 if (!pcbddc->use_vertices) { 1455 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1456 } 1457 if (!pcbddc->use_edges) { 1458 for (i=0;i<n_ISForEdges;i++) { 1459 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1460 } 1461 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1462 n_ISForEdges = 0; 1463 } 1464 if (!pcbddc->use_faces) { 1465 for (i=0;i<n_ISForFaces;i++) { 1466 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1467 } 1468 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1469 n_ISForFaces = 0; 1470 } 1471 /* HACKS (the following two blocks of code) */ 1472 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1473 pcbddc->use_change_of_basis = PETSC_TRUE; 1474 if (!ISForEdges) { 1475 pcbddc->use_change_on_faces = PETSC_TRUE; 1476 } 1477 } 1478 if (pcbddc->NullSpace) { 1479 /* use_change_of_basis should be consistent among processors */ 1480 PetscBool tbool[2],gbool[2]; 1481 tbool [0] = pcbddc->use_change_of_basis; 1482 tbool [1] = pcbddc->use_change_on_faces; 1483 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1484 pcbddc->use_change_of_basis = gbool[0]; 1485 pcbddc->use_change_on_faces = gbool[1]; 1486 } 1487 /* print some info */ 1488 if (pcbddc->dbg_flag) { 1489 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1490 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1491 i = 0; 1492 if (ISForVertices) { 1493 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1494 } 1495 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1496 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1497 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1498 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1499 } 1500 /* check if near null space is attached to global mat */ 1501 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1502 if (nearnullsp) { 1503 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1504 /* remove any stored info */ 1505 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1506 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1507 /* store information for BDDC solver reuse */ 1508 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1509 pcbddc->onearnullspace = nearnullsp; 1510 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1511 for (i=0;i<nnsp_size;i++) { 1512 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1513 } 1514 } else { /* if near null space is not provided BDDC uses constants by default */ 1515 nnsp_size = 0; 1516 nnsp_has_cnst = PETSC_TRUE; 1517 } 1518 /* get max number of constraints on a single cc */ 1519 max_constraints = nnsp_size; 1520 if (nnsp_has_cnst) max_constraints++; 1521 1522 /* 1523 Evaluate maximum storage size needed by the procedure 1524 - temp_indices will contain start index of each constraint stored as follows 1525 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1526 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1527 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1528 */ 1529 total_counts = n_ISForFaces+n_ISForEdges; 1530 total_counts *= max_constraints; 1531 n_vertices = 0; 1532 if (ISForVertices) { 1533 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1534 } 1535 total_counts += n_vertices; 1536 ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr); 1537 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1538 total_counts = 0; 1539 max_size_of_constraint = 0; 1540 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1541 if (i<n_ISForEdges) { 1542 used_IS = &ISForEdges[i]; 1543 } else { 1544 used_IS = &ISForFaces[i-n_ISForEdges]; 1545 } 1546 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1547 total_counts += j; 1548 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1549 } 1550 total_counts *= max_constraints; 1551 total_counts += n_vertices; 1552 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1553 /* get local part of global near null space vectors */ 1554 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1555 for (k=0;k<nnsp_size;k++) { 1556 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1557 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1558 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1559 } 1560 1561 /* whether or not to skip lapack calls */ 1562 skip_lapack = PETSC_TRUE; 1563 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1564 1565 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1566 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1567 PetscScalar temp_work; 1568 #if defined(PETSC_MISSING_LAPACK_GESVD) 1569 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1570 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1571 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1572 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1573 #if defined(PETSC_USE_COMPLEX) 1574 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1575 #endif 1576 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1577 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1578 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1579 lwork = -1; 1580 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1581 #if !defined(PETSC_USE_COMPLEX) 1582 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1583 #else 1584 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1585 #endif 1586 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1587 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1588 #else /* on missing GESVD */ 1589 /* SVD */ 1590 PetscInt max_n,min_n; 1591 max_n = max_size_of_constraint; 1592 min_n = max_constraints; 1593 if (max_size_of_constraint < max_constraints) { 1594 min_n = max_size_of_constraint; 1595 max_n = max_constraints; 1596 } 1597 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1598 #if defined(PETSC_USE_COMPLEX) 1599 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1600 #endif 1601 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1602 lwork = -1; 1603 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1604 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1605 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1606 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1607 #if !defined(PETSC_USE_COMPLEX) 1608 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1609 #else 1610 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1611 #endif 1612 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1613 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1614 #endif /* on missing GESVD */ 1615 /* Allocate optimal workspace */ 1616 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1617 ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr); 1618 } 1619 /* Now we can loop on constraining sets */ 1620 total_counts = 0; 1621 temp_indices[0] = 0; 1622 /* vertices */ 1623 if (ISForVertices) { 1624 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1625 if (nnsp_has_cnst) { /* consider all vertices */ 1626 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1627 for (i=0;i<n_vertices;i++) { 1628 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1629 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1630 total_counts++; 1631 } 1632 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1633 PetscBool used_vertex; 1634 for (i=0;i<n_vertices;i++) { 1635 used_vertex = PETSC_FALSE; 1636 k = 0; 1637 while (!used_vertex && k<nnsp_size) { 1638 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1639 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1640 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1641 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1642 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1643 total_counts++; 1644 used_vertex = PETSC_TRUE; 1645 } 1646 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1647 k++; 1648 } 1649 } 1650 } 1651 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1652 n_vertices = total_counts; 1653 } 1654 1655 /* edges and faces */ 1656 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1657 if (i<n_ISForEdges) { 1658 used_IS = &ISForEdges[i]; 1659 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1660 } else { 1661 used_IS = &ISForFaces[i-n_ISForEdges]; 1662 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1663 } 1664 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1665 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1666 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1667 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1668 /* change of basis should not be performed on local periodic nodes */ 1669 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1670 if (nnsp_has_cnst) { 1671 PetscScalar quad_value; 1672 temp_constraints++; 1673 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1674 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1675 for (j=0;j<size_of_constraint;j++) { 1676 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1677 } 1678 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1679 total_counts++; 1680 } 1681 for (k=0;k<nnsp_size;k++) { 1682 PetscReal real_value; 1683 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1684 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1685 for (j=0;j<size_of_constraint;j++) { 1686 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1687 } 1688 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1689 /* check if array is null on the connected component */ 1690 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1691 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1692 if (real_value > 0.0) { /* keep indices and values */ 1693 temp_constraints++; 1694 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1695 total_counts++; 1696 } 1697 } 1698 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1699 valid_constraints = temp_constraints; 1700 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1701 if (!pcbddc->use_nnsp_true && temp_constraints) { 1702 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1703 1704 #if defined(PETSC_MISSING_LAPACK_GESVD) 1705 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1706 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1707 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1708 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1709 from that computed using LAPACKgesvd 1710 -> This is due to a different computation of eigenvectors in LAPACKheev 1711 -> The quality of the POD-computed basis will be the same */ 1712 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1713 /* Store upper triangular part of correlation matrix */ 1714 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1715 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1716 for (j=0;j<temp_constraints;j++) { 1717 for (k=0;k<j+1;k++) { 1718 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1719 } 1720 } 1721 /* compute eigenvalues and eigenvectors of correlation matrix */ 1722 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1723 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1724 #if !defined(PETSC_USE_COMPLEX) 1725 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1726 #else 1727 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1728 #endif 1729 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1730 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1731 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1732 j=0; 1733 while (j < temp_constraints && singular_vals[j] < tol) j++; 1734 total_counts=total_counts-j; 1735 valid_constraints = temp_constraints-j; 1736 /* scale and copy POD basis into used quadrature memory */ 1737 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1738 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1739 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1740 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1741 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1742 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1743 if (j<temp_constraints) { 1744 PetscInt ii; 1745 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1746 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1747 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1748 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1749 for (k=0;k<temp_constraints-j;k++) { 1750 for (ii=0;ii<size_of_constraint;ii++) { 1751 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1752 } 1753 } 1754 } 1755 #else /* on missing GESVD */ 1756 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1757 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1758 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1759 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1760 #if !defined(PETSC_USE_COMPLEX) 1761 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1762 #else 1763 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1764 #endif 1765 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1766 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1767 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1768 k = temp_constraints; 1769 if (k > size_of_constraint) k = size_of_constraint; 1770 j = 0; 1771 while (j < k && singular_vals[k-j-1] < tol) j++; 1772 total_counts = total_counts-temp_constraints+k-j; 1773 valid_constraints = k-j; 1774 #endif /* on missing GESVD */ 1775 } 1776 /* setting change_of_basis flag is safe now */ 1777 if (boolforchange) { 1778 for (j=0;j<valid_constraints;j++) { 1779 PetscBTSet(change_basis,total_counts-j-1); 1780 } 1781 } 1782 } 1783 /* free index sets of faces, edges and vertices */ 1784 for (i=0;i<n_ISForFaces;i++) { 1785 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1786 } 1787 if (n_ISForFaces) { 1788 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1789 } 1790 for (i=0;i<n_ISForEdges;i++) { 1791 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1792 } 1793 if (n_ISForEdges) { 1794 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1795 } 1796 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1797 /* map temp_indices_to_constraint in boundary numbering */ 1798 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1799 if (i != temp_indices[total_counts]) { 1800 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1801 } 1802 1803 /* free workspace */ 1804 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1805 ierr = PetscFree(work);CHKERRQ(ierr); 1806 #if defined(PETSC_USE_COMPLEX) 1807 ierr = PetscFree(rwork);CHKERRQ(ierr); 1808 #endif 1809 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1810 #if defined(PETSC_MISSING_LAPACK_GESVD) 1811 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1812 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1813 #endif 1814 } 1815 for (k=0;k<nnsp_size;k++) { 1816 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1817 } 1818 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1819 1820 /* set quantities in pcbddc data structure and store previous primal size */ 1821 /* n_vertices defines the number of subdomain corners in the primal space */ 1822 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1823 olocal_primal_size = pcbddc->local_primal_size; 1824 pcbddc->local_primal_size = total_counts; 1825 pcbddc->n_vertices = n_vertices; 1826 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1827 1828 /* Create constraint matrix */ 1829 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1830 /* so we need to set it up properly either with or without change of basis */ 1831 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1832 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1833 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1834 /* array to compute a local numbering of constraints : vertices first then constraints */ 1835 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1836 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1837 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1838 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1839 /* auxiliary stuff for basis change */ 1840 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1841 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1842 1843 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1844 total_primal_vertices=0; 1845 for (i=0;i<pcbddc->local_primal_size;i++) { 1846 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1847 if (size_of_constraint == 1) { 1848 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1849 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1850 aux_primal_minloc[total_primal_vertices]=0; 1851 total_primal_vertices++; 1852 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1853 PetscInt min_loc,min_index; 1854 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1855 /* find first untouched local node */ 1856 k = 0; 1857 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1858 min_index = global_indices[k]; 1859 min_loc = k; 1860 /* search the minimum among global nodes already untouched on the cc */ 1861 for (k=1;k<size_of_constraint;k++) { 1862 /* there can be more than one constraint on a single connected component */ 1863 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1864 min_index = global_indices[k]; 1865 min_loc = k; 1866 } 1867 } 1868 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1869 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1870 aux_primal_minloc[total_primal_vertices]=min_loc; 1871 total_primal_vertices++; 1872 } 1873 } 1874 /* determine if a QR strategy is needed for change of basis */ 1875 qr_needed = PETSC_FALSE; 1876 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1877 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1878 if (PetscBTLookup(change_basis,i)) { 1879 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1880 j = 0; 1881 for (k=0;k<size_of_constraint;k++) { 1882 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1883 j++; 1884 } 1885 } 1886 /* found more than one primal dof on the cc */ 1887 if (j > 1) { 1888 PetscBTSet(qr_needed_idx,i); 1889 qr_needed = PETSC_TRUE; 1890 } 1891 } 1892 } 1893 /* free workspace */ 1894 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1895 1896 /* permute indices in order to have a sorted set of vertices */ 1897 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1898 1899 /* nonzero structure of constraint matrix */ 1900 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1901 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1902 j=total_primal_vertices; 1903 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1904 if (!PetscBTLookup(change_basis,i)) { 1905 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1906 j++; 1907 } 1908 } 1909 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1910 ierr = PetscFree(nnz);CHKERRQ(ierr); 1911 /* set values in constraint matrix */ 1912 for (i=0;i<total_primal_vertices;i++) { 1913 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1914 } 1915 total_counts = total_primal_vertices; 1916 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1917 if (!PetscBTLookup(change_basis,i)) { 1918 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1919 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1920 total_counts++; 1921 } 1922 } 1923 /* assembling */ 1924 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1925 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1926 /* 1927 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1928 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1929 */ 1930 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1931 if (pcbddc->use_change_of_basis) { 1932 /* dual and primal dofs on a single cc */ 1933 PetscInt dual_dofs,primal_dofs; 1934 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1935 PetscInt primal_counter; 1936 /* working stuff for GEQRF */ 1937 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 1938 PetscBLASInt lqr_work; 1939 /* working stuff for UNGQR */ 1940 PetscScalar *gqr_work,lgqr_work_t; 1941 PetscBLASInt lgqr_work; 1942 /* working stuff for TRTRS */ 1943 PetscScalar *trs_rhs; 1944 PetscBLASInt Blas_NRHS; 1945 /* pointers for values insertion into change of basis matrix */ 1946 PetscInt *start_rows,*start_cols; 1947 PetscScalar *start_vals; 1948 /* working stuff for values insertion */ 1949 PetscBT is_primal; 1950 /* matrix sizes */ 1951 PetscInt global_size,local_size; 1952 /* work array for nonzeros */ 1953 PetscScalar *nnz_array; 1954 /* temporary change of basis */ 1955 Mat localChangeOfBasisMatrix; 1956 /* auxiliary work for global change of basis */ 1957 Vec nnz_vec; 1958 PetscInt *idxs_I,*idxs_B,*idxs_all; 1959 PetscInt nvtxs,*xadj,*adjncy,*idxs_mapped; 1960 PetscScalar *vals; 1961 PetscBool done; 1962 1963 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 1964 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 1965 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1966 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1967 1968 /* nonzeros for local mat */ 1969 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 1970 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1971 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1972 if (PetscBTLookup(change_basis,i)) { 1973 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1974 if (PetscBTLookup(qr_needed_idx,i)) { 1975 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1976 } else { 1977 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1978 /* get local primal index on the cc */ 1979 j = 0; 1980 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1981 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1982 } 1983 } 1984 } 1985 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1986 /* Set initial identity in the matrix */ 1987 for (i=0;i<pcis->n_B;i++) { 1988 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1989 } 1990 1991 if (pcbddc->dbg_flag) { 1992 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1993 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1994 } 1995 1996 1997 /* Now we loop on the constraints which need a change of basis */ 1998 /* 1999 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2000 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2001 2002 Basic blocks of change of basis matrix T computed 2003 2004 - Using the following block transformation if there is only a primal dof on the cc 2005 (in the example, primal dof is the last one of the edge in LOCAL ordering 2006 in this code, primal dof is the first one of the edge in GLOBAL ordering) 2007 | 1 0 ... 0 1 | 2008 | 0 1 ... 0 1 | 2009 | ... | 2010 | 0 ... 1 1 | 2011 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 2012 2013 - via QR decomposition of constraints otherwise 2014 */ 2015 if (qr_needed) { 2016 /* space to store Q */ 2017 ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr); 2018 /* first we issue queries for optimal work */ 2019 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2020 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2021 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2022 lqr_work = -1; 2023 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2024 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2025 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2026 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2027 lgqr_work = -1; 2028 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2029 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2030 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2031 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2032 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2033 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2034 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2035 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2036 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2037 /* array to store scaling factors for reflectors */ 2038 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2039 /* array to store rhs and solution of triangular solver */ 2040 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2041 /* allocating workspace for check */ 2042 if (pcbddc->dbg_flag) { 2043 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2044 } 2045 } 2046 /* array to store whether a node is primal or not */ 2047 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2048 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2049 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2050 if (i != total_primal_vertices) { 2051 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2052 } 2053 for (i=0;i<total_primal_vertices;i++) { 2054 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2055 } 2056 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2057 2058 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2059 /* -> using implicit ordering contained in temp_indices data */ 2060 total_counts = pcbddc->n_vertices; 2061 primal_counter = total_counts; 2062 while (total_counts<pcbddc->local_primal_size) { 2063 primal_dofs = 1; 2064 if (PetscBTLookup(change_basis,total_counts)) { 2065 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2066 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2067 primal_dofs++; 2068 } 2069 /* get constraint info */ 2070 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2071 dual_dofs = size_of_constraint-primal_dofs; 2072 2073 if (pcbddc->dbg_flag) { 2074 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2075 } 2076 2077 if (primal_dofs > 1) { /* QR */ 2078 2079 /* copy quadrature constraints for change of basis check */ 2080 if (pcbddc->dbg_flag) { 2081 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2082 } 2083 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2084 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2085 2086 /* compute QR decomposition of constraints */ 2087 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2088 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2089 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2090 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2091 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2092 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2093 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2094 2095 /* explictly compute R^-T */ 2096 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2097 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2098 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2099 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2100 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2101 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2102 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2103 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2104 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2105 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2106 2107 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2108 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2109 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2110 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2111 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2112 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2113 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2114 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2115 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2116 2117 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2118 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2119 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2120 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2121 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2122 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2123 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2124 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2125 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2126 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2127 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2128 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2129 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2130 2131 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2132 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2133 /* insert cols for primal dofs */ 2134 for (j=0;j<primal_dofs;j++) { 2135 start_vals = &qr_basis[j*size_of_constraint]; 2136 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2137 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2138 } 2139 /* insert cols for dual dofs */ 2140 for (j=0,k=0;j<dual_dofs;k++) { 2141 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2142 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2143 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2144 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2145 j++; 2146 } 2147 } 2148 2149 /* check change of basis */ 2150 if (pcbddc->dbg_flag) { 2151 PetscInt ii,jj; 2152 PetscBool valid_qr=PETSC_TRUE; 2153 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2154 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2155 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2156 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2157 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2158 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2159 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2160 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2161 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2162 for (jj=0;jj<size_of_constraint;jj++) { 2163 for (ii=0;ii<primal_dofs;ii++) { 2164 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2165 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2166 } 2167 } 2168 if (!valid_qr) { 2169 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2170 for (jj=0;jj<size_of_constraint;jj++) { 2171 for (ii=0;ii<primal_dofs;ii++) { 2172 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2173 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2174 } 2175 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2176 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2177 } 2178 } 2179 } 2180 } else { 2181 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2182 } 2183 } 2184 } else { /* simple transformation block */ 2185 PetscInt row,col; 2186 PetscScalar val; 2187 for (j=0;j<size_of_constraint;j++) { 2188 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2189 if (!PetscBTLookup(is_primal,row)) { 2190 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2191 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2192 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2193 } else { 2194 for (k=0;k<size_of_constraint;k++) { 2195 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2196 if (row != col) { 2197 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2198 } else { 2199 val = 1.0; 2200 } 2201 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2202 } 2203 } 2204 } 2205 if (pcbddc->dbg_flag) { 2206 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2207 } 2208 } 2209 /* increment primal counter */ 2210 primal_counter += primal_dofs; 2211 } else { 2212 if (pcbddc->dbg_flag) { 2213 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2214 } 2215 } 2216 /* increment constraint counter total_counts */ 2217 total_counts += primal_dofs; 2218 } 2219 2220 /* free workspace */ 2221 if (qr_needed) { 2222 if (pcbddc->dbg_flag) { 2223 ierr = PetscFree(work);CHKERRQ(ierr); 2224 } 2225 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2226 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2227 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2228 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2229 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2230 } 2231 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2232 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2233 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2234 2235 /* assembling of global change of variable */ 2236 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2237 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2238 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2239 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2240 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2241 ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr); 2242 2243 /* nonzeros (overestimated) */ 2244 ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr); 2245 ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr); 2246 ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr); 2247 for (i=0;i<pcis->n;i++) { 2248 nnz_array[i] = 1.0; 2249 idxs_all[i] = i; 2250 } 2251 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2252 for (i=0;i<pcis->n_B;i++) { 2253 nnz_array[idxs_B[i]] = nnz[i]; 2254 } 2255 ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr); 2256 ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr); 2257 ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr); 2258 ierr = PetscFree(nnz);CHKERRQ(ierr); 2259 ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr); 2260 ierr = PetscMalloc1(local_size,&nnz);CHKERRQ(ierr); 2261 ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2262 for (i=0;i<local_size;i++) { 2263 nnz[i] = (PetscInt)(PetscRealPart(nnz_array[i])); 2264 } 2265 ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2266 ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr); 2267 ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz,0,nnz);CHKERRQ(ierr); 2268 ierr = PetscFree(nnz);CHKERRQ(ierr); 2269 2270 /* Set identity on dirichlet dofs */ 2271 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2272 for (i=0;i<pcis->n-pcis->n_B;i++) { 2273 PetscScalar one=1.0; 2274 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr); 2275 } 2276 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2277 2278 /* Set values at interface dofs */ 2279 done = PETSC_TRUE; 2280 ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2281 if (!done) { 2282 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2283 } 2284 ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2285 ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr); 2286 ierr = ISLocalToGlobalMappingApply(pcbddc->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr); 2287 for (i=0;i<nvtxs;i++) { 2288 PetscInt row,*cols,ncols; 2289 PetscScalar *mat_vals; 2290 2291 row = idxs_B[i]; 2292 ncols = xadj[i+1]-xadj[i]; 2293 cols = idxs_mapped+xadj[i]; 2294 mat_vals = vals+xadj[i]; 2295 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr); 2296 } 2297 ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2298 if (!done) { 2299 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2300 } 2301 ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2302 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2303 ierr = PetscFree(idxs_mapped);CHKERRQ(ierr); 2304 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2305 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2306 2307 /* check */ 2308 if (pcbddc->dbg_flag) { 2309 PetscReal error; 2310 Vec x,x_change; 2311 2312 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2313 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2314 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2315 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2316 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2317 ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2318 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 2319 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2320 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2321 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2322 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2323 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2324 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2325 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr); 2326 ierr = VecDestroy(&x);CHKERRQ(ierr); 2327 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2328 } 2329 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2330 2331 /* 2332 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2333 ierr = MatView(local_ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2334 */ 2335 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2336 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2337 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2338 } 2339 2340 /* set up change of basis context */ 2341 if (pcbddc->ChangeOfBasisMatrix) { 2342 PCBDDCChange_ctx change_ctx; 2343 2344 if (!pcbddc->new_global_mat) { 2345 PetscInt global_size,local_size; 2346 2347 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2348 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2349 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2350 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2351 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2352 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2353 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2354 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2355 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2356 } else { 2357 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2358 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2359 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2360 } 2361 if (!pcbddc->user_ChangeOfBasisMatrix) { 2362 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2363 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2364 } else { 2365 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2366 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2367 } 2368 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2369 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2370 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2371 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2372 } 2373 2374 /* get indices in local ordering for vertices and constraints */ 2375 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2376 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2377 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2378 } 2379 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2380 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2381 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2382 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2383 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2384 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2385 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2386 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2387 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2388 /* set quantities in PCBDDC data struct */ 2389 pcbddc->n_actual_vertices = i; 2390 /* check if a new primal space has been introduced */ 2391 pcbddc->new_primal_space_local = PETSC_TRUE; 2392 if (olocal_primal_size == pcbddc->local_primal_size) { 2393 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2394 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2395 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2396 } 2397 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2398 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2399 2400 /* flush dbg viewer */ 2401 if (pcbddc->dbg_flag) { 2402 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2403 } 2404 2405 /* free workspace */ 2406 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2407 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2408 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2409 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2410 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2411 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2412 PetscFunctionReturn(0); 2413 } 2414 2415 #undef __FUNCT__ 2416 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2417 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2418 { 2419 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2420 PC_IS *pcis = (PC_IS*)pc->data; 2421 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2422 PetscInt ierr,i,vertex_size; 2423 PetscViewer viewer=pcbddc->dbg_viewer; 2424 2425 PetscFunctionBegin; 2426 /* Reset previously computed graph */ 2427 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2428 /* Init local Graph struct */ 2429 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2430 2431 /* Check validity of the csr graph passed in by the user */ 2432 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2433 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2434 } 2435 2436 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2437 if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) { 2438 Mat mat_adj; 2439 const PetscInt *xadj,*adjncy; 2440 PetscBool flg_row=PETSC_TRUE; 2441 2442 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2443 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2444 if (!flg_row) { 2445 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2446 } 2447 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2448 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2449 if (!flg_row) { 2450 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2451 } 2452 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2453 pcbddc->deluxe_compute_rowadj = PETSC_FALSE; 2454 } 2455 2456 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2457 vertex_size = 1; 2458 if (pcbddc->user_provided_isfordofs) { 2459 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2460 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2461 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2462 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2463 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2464 } 2465 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2466 pcbddc->n_ISForDofs = 0; 2467 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2468 } 2469 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2470 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2471 } else { 2472 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2473 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2474 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2475 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2476 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2477 } 2478 } 2479 } 2480 2481 /* Setup of Graph */ 2482 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2483 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2484 } 2485 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2486 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2487 } 2488 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2489 2490 /* Graph's connected components analysis */ 2491 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2492 2493 /* print some info to stdout */ 2494 if (pcbddc->dbg_flag) { 2495 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2496 } 2497 2498 /* mark topography has done */ 2499 pcbddc->recompute_topography = PETSC_FALSE; 2500 PetscFunctionReturn(0); 2501 } 2502 2503 #undef __FUNCT__ 2504 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2505 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2506 { 2507 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2508 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2509 PetscErrorCode ierr; 2510 2511 PetscFunctionBegin; 2512 n = 0; 2513 vertices = 0; 2514 if (pcbddc->ConstraintMatrix) { 2515 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2516 for (i=0;i<local_primal_size;i++) { 2517 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2518 if (size_of_constraint == 1) n++; 2519 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2520 } 2521 if (vertices_idx) { 2522 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2523 n = 0; 2524 for (i=0;i<local_primal_size;i++) { 2525 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2526 if (size_of_constraint == 1) { 2527 vertices[n++]=row_cmat_indices[0]; 2528 } 2529 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2530 } 2531 } 2532 } 2533 *n_vertices = n; 2534 if (vertices_idx) *vertices_idx = vertices; 2535 PetscFunctionReturn(0); 2536 } 2537 2538 #undef __FUNCT__ 2539 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2540 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2541 { 2542 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2543 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2544 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2545 PetscBT touched; 2546 PetscErrorCode ierr; 2547 2548 /* This function assumes that the number of local constraints per connected component 2549 is not greater than the number of nodes defined for the connected component 2550 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2551 PetscFunctionBegin; 2552 n = 0; 2553 constraints_index = 0; 2554 if (pcbddc->ConstraintMatrix) { 2555 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2556 max_size_of_constraint = 0; 2557 for (i=0;i<local_primal_size;i++) { 2558 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2559 if (size_of_constraint > 1) { 2560 n++; 2561 } 2562 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2563 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2564 } 2565 if (constraints_idx) { 2566 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2567 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2568 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2569 n = 0; 2570 for (i=0;i<local_primal_size;i++) { 2571 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2572 if (size_of_constraint > 1) { 2573 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2574 /* find first untouched local node */ 2575 j = 0; 2576 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2577 min_index = row_cmat_global_indices[j]; 2578 min_loc = j; 2579 /* search the minimum among nodes not yet touched on the connected component 2580 since there can be more than one constraint on a single cc */ 2581 for (j=1;j<size_of_constraint;j++) { 2582 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2583 min_index = row_cmat_global_indices[j]; 2584 min_loc = j; 2585 } 2586 } 2587 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2588 constraints_index[n++] = row_cmat_indices[min_loc]; 2589 } 2590 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2591 } 2592 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2593 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2594 } 2595 } 2596 *n_constraints = n; 2597 if (constraints_idx) *constraints_idx = constraints_index; 2598 PetscFunctionReturn(0); 2599 } 2600 2601 /* the next two functions has been adapted from pcis.c */ 2602 #undef __FUNCT__ 2603 #define __FUNCT__ "PCBDDCApplySchur" 2604 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2605 { 2606 PetscErrorCode ierr; 2607 PC_IS *pcis = (PC_IS*)(pc->data); 2608 2609 PetscFunctionBegin; 2610 if (!vec2_B) { vec2_B = v; } 2611 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2612 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2613 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2614 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2615 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2616 PetscFunctionReturn(0); 2617 } 2618 2619 #undef __FUNCT__ 2620 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2621 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2622 { 2623 PetscErrorCode ierr; 2624 PC_IS *pcis = (PC_IS*)(pc->data); 2625 2626 PetscFunctionBegin; 2627 if (!vec2_B) { vec2_B = v; } 2628 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2629 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2630 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2631 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2632 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2633 PetscFunctionReturn(0); 2634 } 2635 2636 #undef __FUNCT__ 2637 #define __FUNCT__ "PCBDDCSubsetNumbering" 2638 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2639 { 2640 Vec local_vec,global_vec; 2641 IS seqis,paris; 2642 VecScatter scatter_ctx; 2643 PetscScalar *array; 2644 PetscInt *temp_global_dofs; 2645 PetscScalar globalsum; 2646 PetscInt i,j,s; 2647 PetscInt nlocals,first_index,old_index,max_local; 2648 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2649 PetscMPIInt *dof_sizes,*dof_displs; 2650 PetscBool first_found; 2651 PetscErrorCode ierr; 2652 2653 PetscFunctionBegin; 2654 /* mpi buffers */ 2655 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2656 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2657 j = ( !rank_prec_comm ? size_prec_comm : 0); 2658 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2659 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2660 /* get maximum size of subset */ 2661 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2662 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2663 max_local = 0; 2664 for (i=0;i<n_local_dofs;i++) { 2665 if (max_local < temp_global_dofs[i] ) { 2666 max_local = temp_global_dofs[i]; 2667 } 2668 } 2669 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2670 max_global++; 2671 max_local = 0; 2672 for (i=0;i<n_local_dofs;i++) { 2673 if (max_local < local_dofs[i] ) { 2674 max_local = local_dofs[i]; 2675 } 2676 } 2677 max_local++; 2678 /* allocate workspace */ 2679 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2680 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2681 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2682 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2683 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2684 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2685 /* create scatter */ 2686 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2687 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2688 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2689 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2690 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2691 /* init array */ 2692 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2693 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2694 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2695 if (local_dofs_mult) { 2696 for (i=0;i<n_local_dofs;i++) { 2697 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2698 } 2699 } else { 2700 for (i=0;i<n_local_dofs;i++) { 2701 array[local_dofs[i]]=1.0; 2702 } 2703 } 2704 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2705 /* scatter into global vec and get total number of global dofs */ 2706 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2707 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2708 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2709 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2710 /* Fill global_vec with cumulative function for global numbering */ 2711 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2712 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2713 nlocals = 0; 2714 first_index = -1; 2715 first_found = PETSC_FALSE; 2716 for (i=0;i<s;i++) { 2717 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2718 first_found = PETSC_TRUE; 2719 first_index = i; 2720 } 2721 nlocals += (PetscInt)PetscRealPart(array[i]); 2722 } 2723 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2724 if (!rank_prec_comm) { 2725 dof_displs[0]=0; 2726 for (i=1;i<size_prec_comm;i++) { 2727 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2728 } 2729 } 2730 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2731 if (first_found) { 2732 array[first_index] += (PetscScalar)nlocals; 2733 old_index = first_index; 2734 for (i=first_index+1;i<s;i++) { 2735 if (PetscRealPart(array[i]) > 0.1) { 2736 array[i] += array[old_index]; 2737 old_index = i; 2738 } 2739 } 2740 } 2741 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2742 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2743 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2744 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2745 /* get global ordering of local dofs */ 2746 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2747 if (local_dofs_mult) { 2748 for (i=0;i<n_local_dofs;i++) { 2749 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2750 } 2751 } else { 2752 for (i=0;i<n_local_dofs;i++) { 2753 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2754 } 2755 } 2756 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2757 /* free workspace */ 2758 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2759 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2760 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2761 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2762 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2763 /* return pointer to global ordering of local dofs */ 2764 *global_numbering_subset = temp_global_dofs; 2765 PetscFunctionReturn(0); 2766 } 2767 2768 #undef __FUNCT__ 2769 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2770 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2771 { 2772 PetscInt i,j; 2773 PetscScalar *alphas; 2774 PetscErrorCode ierr; 2775 2776 PetscFunctionBegin; 2777 /* this implements stabilized Gram-Schmidt */ 2778 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2779 for (i=0;i<n;i++) { 2780 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2781 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2782 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2783 } 2784 ierr = PetscFree(alphas);CHKERRQ(ierr); 2785 PetscFunctionReturn(0); 2786 } 2787 2788 #undef __FUNCT__ 2789 #define __FUNCT__ "MatISGetSubassemblingPattern" 2790 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2791 { 2792 Mat subdomain_adj; 2793 IS new_ranks,ranks_send_to; 2794 MatPartitioning partitioner; 2795 Mat_IS *matis; 2796 PetscInt n_neighs,*neighs,*n_shared,**shared; 2797 PetscInt prank; 2798 PetscMPIInt size,rank,color; 2799 PetscInt *xadj,*adjncy,*oldranks; 2800 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2801 PetscInt i,j,local_size,threshold=0; 2802 PetscErrorCode ierr; 2803 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2804 PetscSubcomm subcomm; 2805 2806 PetscFunctionBegin; 2807 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2808 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2809 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2810 2811 /* Get info on mapping */ 2812 matis = (Mat_IS*)(mat->data); 2813 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2814 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2815 2816 /* build local CSR graph of subdomains' connectivity */ 2817 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2818 xadj[0] = 0; 2819 xadj[1] = PetscMax(n_neighs-1,0); 2820 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2821 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2822 2823 if (threshold) { 2824 PetscInt* count,min_threshold; 2825 ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr); 2826 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2827 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2828 for (j=0;j<n_shared[i];j++) { 2829 count[shared[i][j]] += 1; 2830 } 2831 } 2832 /* adapt threshold since we dont want to lose significant connections */ 2833 min_threshold = n_neighs; 2834 for (i=1;i<n_neighs;i++) { 2835 for (j=0;j<n_shared[i];j++) { 2836 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2837 } 2838 } 2839 threshold = PetscMax(min_threshold+1,threshold); 2840 xadj[1] = 0; 2841 for (i=1;i<n_neighs;i++) { 2842 for (j=0;j<n_shared[i];j++) { 2843 if (count[shared[i][j]] < threshold) { 2844 adjncy[xadj[1]] = neighs[i]; 2845 adjncy_wgt[xadj[1]] = n_shared[i]; 2846 xadj[1]++; 2847 break; 2848 } 2849 } 2850 } 2851 ierr = PetscFree(count);CHKERRQ(ierr); 2852 } else { 2853 if (xadj[1]) { 2854 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2855 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2856 } 2857 } 2858 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2859 if (use_square) { 2860 for (i=0;i<xadj[1];i++) { 2861 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2862 } 2863 } 2864 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2865 2866 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2867 2868 /* 2869 Restrict work on active processes only. 2870 */ 2871 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2872 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2873 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2874 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2875 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2876 if (color) { 2877 ierr = PetscFree(xadj);CHKERRQ(ierr); 2878 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2879 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2880 } else { 2881 PetscInt coarsening_ratio; 2882 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2883 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2884 prank = rank; 2885 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2886 /* 2887 for (i=0;i<size;i++) { 2888 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2889 } 2890 */ 2891 for (i=0;i<xadj[1];i++) { 2892 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2893 } 2894 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2895 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2896 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2897 2898 /* Partition */ 2899 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2900 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2901 if (use_vwgt) { 2902 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2903 v_wgt[0] = local_size; 2904 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2905 } 2906 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2907 coarsening_ratio = size/n_subdomains; 2908 /* Parmetis does not always give back nparts with small graphs! this should be taken into account */ 2909 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2910 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2911 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2912 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2913 2914 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2915 if (contiguous) { 2916 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2917 } else { 2918 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2919 } 2920 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2921 /* clean up */ 2922 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2923 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2924 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2925 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2926 } 2927 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2928 2929 /* assemble parallel IS for sends */ 2930 i = 1; 2931 if (color) i=0; 2932 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2933 2934 /* get back IS */ 2935 *is_sends = ranks_send_to; 2936 PetscFunctionReturn(0); 2937 } 2938 2939 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2940 2941 #undef __FUNCT__ 2942 #define __FUNCT__ "MatISSubassemble" 2943 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 2944 { 2945 Mat local_mat; 2946 Mat_IS *matis; 2947 IS is_sends_internal; 2948 PetscInt rows,cols; 2949 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 2950 PetscBool ismatis,isdense,destroy_mat; 2951 ISLocalToGlobalMapping l2gmap; 2952 PetscInt* l2gmap_indices; 2953 const PetscInt* is_indices; 2954 MatType new_local_type; 2955 /* buffers */ 2956 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2957 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 2958 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2959 /* MPI */ 2960 MPI_Comm comm,comm_n; 2961 PetscSubcomm subcomm; 2962 PetscMPIInt n_sends,n_recvs,commsize; 2963 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 2964 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 2965 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 2966 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 2967 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 2968 PetscErrorCode ierr; 2969 2970 PetscFunctionBegin; 2971 /* TODO: add missing checks */ 2972 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 2973 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 2974 PetscValidLogicalCollectiveEnum(mat,reuse,5); 2975 PetscValidLogicalCollectiveInt(mat,nis,7); 2976 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2977 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 2978 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2979 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2980 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2981 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2982 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2983 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 2984 PetscInt mrows,mcols,mnrows,mncols; 2985 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2986 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2987 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2988 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2989 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2990 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2991 } 2992 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2993 PetscValidLogicalCollectiveInt(mat,bs,0); 2994 /* prepare IS for sending if not provided */ 2995 if (!is_sends) { 2996 PetscBool pcontig = PETSC_TRUE; 2997 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 2998 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 2999 } else { 3000 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3001 is_sends_internal = is_sends; 3002 } 3003 3004 /* get pointer of MATIS data */ 3005 matis = (Mat_IS*)mat->data; 3006 3007 /* get comm */ 3008 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3009 3010 /* compute number of sends */ 3011 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3012 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3013 3014 /* compute number of receives */ 3015 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3016 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3017 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3018 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3019 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3020 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3021 ierr = PetscFree(iflags);CHKERRQ(ierr); 3022 3023 /* restrict comm if requested */ 3024 subcomm = 0; 3025 destroy_mat = PETSC_FALSE; 3026 if (restrict_comm) { 3027 PetscMPIInt color,rank,subcommsize; 3028 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3029 color = 0; 3030 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3031 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3032 subcommsize = commsize - subcommsize; 3033 /* check if reuse has been requested */ 3034 if (reuse == MAT_REUSE_MATRIX) { 3035 if (*mat_n) { 3036 PetscMPIInt subcommsize2; 3037 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3038 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3039 comm_n = PetscObjectComm((PetscObject)*mat_n); 3040 } else { 3041 comm_n = PETSC_COMM_SELF; 3042 } 3043 } else { /* MAT_INITIAL_MATRIX */ 3044 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3045 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3046 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3047 comm_n = subcomm->comm; 3048 } 3049 /* flag to destroy *mat_n if not significative */ 3050 if (color) destroy_mat = PETSC_TRUE; 3051 } else { 3052 comm_n = comm; 3053 } 3054 3055 /* prepare send/receive buffers */ 3056 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3057 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3058 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3059 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3060 if (nis) { 3061 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr); 3062 ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr); 3063 } 3064 3065 /* Get data from local matrices */ 3066 if (!isdense) { 3067 /* TODO: See below some guidelines on how to prepare the local buffers */ 3068 /* 3069 send_buffer_vals should contain the raw values of the local matrix 3070 send_buffer_idxs should contain: 3071 - MatType_PRIVATE type 3072 - PetscInt size_of_l2gmap 3073 - PetscInt global_row_indices[size_of_l2gmap] 3074 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3075 */ 3076 } else { 3077 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3078 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3079 ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr); 3080 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3081 send_buffer_idxs[1] = i; 3082 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3083 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3084 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3085 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3086 for (i=0;i<n_sends;i++) { 3087 ilengths_vals[is_indices[i]] = len*len; 3088 ilengths_idxs[is_indices[i]] = len+2; 3089 } 3090 } 3091 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3092 /* additional is (if any) */ 3093 if (nis) { 3094 PetscMPIInt psum; 3095 PetscInt j; 3096 for (j=0,psum=0;j<nis;j++) { 3097 PetscInt plen; 3098 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3099 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3100 psum += len+1; /* indices + lenght */ 3101 } 3102 ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr); 3103 for (j=0,psum=0;j<nis;j++) { 3104 PetscInt plen; 3105 const PetscInt *is_array_idxs; 3106 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3107 send_buffer_idxs_is[psum] = plen; 3108 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3109 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3110 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3111 psum += plen+1; /* indices + lenght */ 3112 } 3113 for (i=0;i<n_sends;i++) { 3114 ilengths_idxs_is[is_indices[i]] = psum; 3115 } 3116 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3117 } 3118 3119 buf_size_idxs = 0; 3120 buf_size_vals = 0; 3121 buf_size_idxs_is = 0; 3122 for (i=0;i<n_recvs;i++) { 3123 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3124 buf_size_vals += (PetscInt)olengths_vals[i]; 3125 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3126 } 3127 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3128 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3129 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3130 3131 /* get new tags for clean communications */ 3132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3133 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3134 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3135 3136 /* allocate for requests */ 3137 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3138 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3139 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3140 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3141 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3142 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3143 3144 /* communications */ 3145 ptr_idxs = recv_buffer_idxs; 3146 ptr_vals = recv_buffer_vals; 3147 ptr_idxs_is = recv_buffer_idxs_is; 3148 for (i=0;i<n_recvs;i++) { 3149 source_dest = onodes[i]; 3150 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3151 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3152 ptr_idxs += olengths_idxs[i]; 3153 ptr_vals += olengths_vals[i]; 3154 if (nis) { 3155 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3156 ptr_idxs_is += olengths_idxs_is[i]; 3157 } 3158 } 3159 for (i=0;i<n_sends;i++) { 3160 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3161 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3162 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3163 if (nis) { 3164 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3165 } 3166 } 3167 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3168 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3169 3170 /* assemble new l2g map */ 3171 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3172 ptr_idxs = recv_buffer_idxs; 3173 buf_size_idxs = 0; 3174 for (i=0;i<n_recvs;i++) { 3175 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3176 ptr_idxs += olengths_idxs[i]; 3177 } 3178 ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr); 3179 ptr_idxs = recv_buffer_idxs; 3180 buf_size_idxs = 0; 3181 for (i=0;i<n_recvs;i++) { 3182 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3183 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3184 ptr_idxs += olengths_idxs[i]; 3185 } 3186 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3187 ierr = ISLocalToGlobalMappingCreate(comm_n,1,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3188 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3189 3190 /* infer new local matrix type from received local matrices type */ 3191 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3192 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3193 if (n_recvs) { 3194 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3195 ptr_idxs = recv_buffer_idxs; 3196 for (i=0;i<n_recvs;i++) { 3197 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3198 new_local_type_private = MATAIJ_PRIVATE; 3199 break; 3200 } 3201 ptr_idxs += olengths_idxs[i]; 3202 } 3203 switch (new_local_type_private) { 3204 case MATDENSE_PRIVATE: 3205 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3206 new_local_type = MATSEQAIJ; 3207 bs = 1; 3208 } else { /* if I receive only 1 dense matrix */ 3209 new_local_type = MATSEQDENSE; 3210 bs = 1; 3211 } 3212 break; 3213 case MATAIJ_PRIVATE: 3214 new_local_type = MATSEQAIJ; 3215 bs = 1; 3216 break; 3217 case MATBAIJ_PRIVATE: 3218 new_local_type = MATSEQBAIJ; 3219 break; 3220 case MATSBAIJ_PRIVATE: 3221 new_local_type = MATSEQSBAIJ; 3222 break; 3223 default: 3224 SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3225 break; 3226 } 3227 } else { /* by default, new_local_type is seqdense */ 3228 new_local_type = MATSEQDENSE; 3229 bs = 1; 3230 } 3231 3232 /* create MATIS object if needed */ 3233 if (reuse == MAT_INITIAL_MATRIX) { 3234 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3235 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3236 } else { 3237 /* it also destroys the local matrices */ 3238 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3239 } 3240 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3241 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3242 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3243 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3244 3245 /* set values */ 3246 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3247 ptr_vals = recv_buffer_vals; 3248 ptr_idxs = recv_buffer_idxs; 3249 for (i=0;i<n_recvs;i++) { 3250 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3251 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3252 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3253 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3254 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3255 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3256 } else { 3257 /* TODO */ 3258 } 3259 ptr_idxs += olengths_idxs[i]; 3260 ptr_vals += olengths_vals[i]; 3261 } 3262 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3263 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3264 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3265 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3266 3267 #if 0 3268 if (!restrict_comm) { /* check */ 3269 Vec lvec,rvec; 3270 PetscReal infty_error; 3271 3272 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3273 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3274 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3275 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3276 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3277 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3278 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3279 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3280 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3281 } 3282 #endif 3283 3284 /* assemble new additional is (if any) */ 3285 if (nis) { 3286 PetscInt **temp_idxs,*count_is,j,psum; 3287 3288 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3289 ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr); 3290 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3291 ptr_idxs = recv_buffer_idxs_is; 3292 psum = 0; 3293 for (i=0;i<n_recvs;i++) { 3294 for (j=0;j<nis;j++) { 3295 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3296 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3297 psum += plen; 3298 ptr_idxs += plen+1; /* shift pointer to received data */ 3299 } 3300 } 3301 ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr); 3302 ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr); 3303 for (i=1;i<nis;i++) { 3304 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3305 } 3306 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3307 ptr_idxs = recv_buffer_idxs_is; 3308 for (i=0;i<n_recvs;i++) { 3309 for (j=0;j<nis;j++) { 3310 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3311 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3312 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3313 ptr_idxs += plen+1; /* shift pointer to received data */ 3314 } 3315 } 3316 for (i=0;i<nis;i++) { 3317 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3318 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3319 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3320 } 3321 ierr = PetscFree(count_is);CHKERRQ(ierr); 3322 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3323 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3324 } 3325 /* free workspace */ 3326 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3327 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3328 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3329 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3330 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3331 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3332 if (isdense) { 3333 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3334 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3335 } else { 3336 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3337 } 3338 if (nis) { 3339 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3340 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3341 } 3342 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3343 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3344 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3345 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3346 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3347 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3348 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3349 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3350 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3351 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3352 ierr = PetscFree(onodes);CHKERRQ(ierr); 3353 if (nis) { 3354 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3355 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3356 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3357 } 3358 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3359 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3360 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3361 for (i=0;i<nis;i++) { 3362 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3363 } 3364 } 3365 PetscFunctionReturn(0); 3366 } 3367 3368 /* temporary hack into ksp private data structure */ 3369 #include <petsc-private/kspimpl.h> 3370 3371 #undef __FUNCT__ 3372 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3373 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3374 { 3375 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3376 PC_IS *pcis = (PC_IS*)pc->data; 3377 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3378 MatNullSpace CoarseNullSpace=NULL; 3379 ISLocalToGlobalMapping coarse_islg; 3380 IS coarse_is,*isarray; 3381 PetscInt i,im_active=-1,active_procs=-1; 3382 PetscInt nis,nisdofs,nisneu; 3383 PC pc_temp; 3384 PCType coarse_pc_type; 3385 KSPType coarse_ksp_type; 3386 PetscBool multilevel_requested,multilevel_allowed; 3387 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3388 Mat t_coarse_mat_is; 3389 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3390 PetscMPIInt all_procs; 3391 PetscBool csin_ml,csin_ds,csin,csin_type_simple; 3392 PetscBool compute_vecs = PETSC_FALSE; 3393 PetscErrorCode ierr; 3394 3395 PetscFunctionBegin; 3396 /* Assign global numbering to coarse dofs */ 3397 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3398 compute_vecs = PETSC_TRUE; 3399 PetscInt ocoarse_size; 3400 ocoarse_size = pcbddc->coarse_size; 3401 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3402 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3403 /* see if we can avoid some work */ 3404 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3405 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3406 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3407 coarse_reuse = PETSC_FALSE; 3408 } else { /* we can safely reuse already computed coarse matrix */ 3409 coarse_reuse = PETSC_TRUE; 3410 } 3411 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3412 coarse_reuse = PETSC_FALSE; 3413 } 3414 /* reset any subassembling information */ 3415 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3416 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3417 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3418 coarse_reuse = PETSC_TRUE; 3419 } 3420 3421 /* count "active" (i.e. with positive local size) and "void" processes */ 3422 im_active = !!(pcis->n); 3423 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3424 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3425 void_procs = all_procs-active_procs; 3426 csin_type_simple = PETSC_TRUE; 3427 if (pcbddc->current_level) { 3428 csin_ml = PETSC_TRUE; 3429 ncoarse_ml = void_procs; 3430 csin_ds = PETSC_TRUE; 3431 ncoarse_ds = void_procs; 3432 if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 3433 } else { 3434 csin_ml = PETSC_FALSE; 3435 ncoarse_ml = all_procs; 3436 if (void_procs) { 3437 csin_ds = PETSC_TRUE; 3438 ncoarse_ds = void_procs; 3439 csin_type_simple = PETSC_FALSE; 3440 } else { 3441 csin_ds = PETSC_FALSE; 3442 ncoarse_ds = all_procs; 3443 } 3444 } 3445 3446 /* 3447 test if we can go multilevel: three conditions must be satisfied: 3448 - we have not exceeded the number of levels requested 3449 - we can actually subassemble the active processes 3450 - we can find a suitable number of MPI processes where we can place the subassembled problem 3451 */ 3452 multilevel_allowed = PETSC_FALSE; 3453 multilevel_requested = PETSC_FALSE; 3454 if (pcbddc->current_level < pcbddc->max_levels) { 3455 multilevel_requested = PETSC_TRUE; 3456 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3457 multilevel_allowed = PETSC_FALSE; 3458 } else { 3459 multilevel_allowed = PETSC_TRUE; 3460 } 3461 } 3462 /* determine number of process partecipating to coarse solver */ 3463 if (multilevel_allowed) { 3464 ncoarse = ncoarse_ml; 3465 csin = csin_ml; 3466 } else { 3467 ncoarse = ncoarse_ds; 3468 csin = csin_ds; 3469 } 3470 3471 /* creates temporary l2gmap and IS for coarse indexes */ 3472 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3473 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3474 3475 /* creates temporary MATIS object for coarse matrix */ 3476 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3477 #if 0 3478 { 3479 PetscViewer viewer; 3480 char filename[256]; 3481 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3482 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3483 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3484 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3485 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3486 } 3487 #endif 3488 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3489 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3490 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3491 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3492 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3493 3494 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3495 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3496 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3497 const PetscInt *idxs; 3498 ISLocalToGlobalMapping tmap; 3499 3500 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3501 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3502 /* allocate space for temporary storage */ 3503 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3504 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3505 /* allocate for IS array */ 3506 nisdofs = pcbddc->n_ISForDofsLocal; 3507 nisneu = !!pcbddc->NeumannBoundariesLocal; 3508 nis = nisdofs + nisneu; 3509 ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr); 3510 /* dofs splitting */ 3511 for (i=0;i<nisdofs;i++) { 3512 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3513 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3514 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3515 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3516 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3517 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3518 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3519 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3520 } 3521 /* neumann boundaries */ 3522 if (pcbddc->NeumannBoundariesLocal) { 3523 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3524 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3525 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3526 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3527 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3528 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3529 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3530 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3531 } 3532 /* free memory */ 3533 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3534 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3535 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3536 } else { 3537 nis = 0; 3538 nisdofs = 0; 3539 nisneu = 0; 3540 isarray = NULL; 3541 } 3542 /* destroy no longer needed map */ 3543 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3544 3545 /* restrict on coarse candidates (if needed) */ 3546 coarse_mat_is = NULL; 3547 if (csin) { 3548 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3549 PetscInt j,tissize,*nisindices; 3550 PetscInt *coarse_candidates; 3551 const PetscInt* tisindices; 3552 /* get coarse candidates' ranks in pc communicator */ 3553 ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr); 3554 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3555 for (i=0,j=0;i<all_procs;i++) { 3556 if (!coarse_candidates[i]) { 3557 coarse_candidates[j]=i; 3558 j++; 3559 } 3560 } 3561 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3562 /* get a suitable subassembling pattern */ 3563 if (csin_type_simple) { 3564 PetscMPIInt rank; 3565 PetscInt issize,isidx; 3566 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3567 if (im_active) { 3568 issize = 1; 3569 isidx = (PetscInt)rank; 3570 } else { 3571 issize = 0; 3572 isidx = -1; 3573 } 3574 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3575 } else { 3576 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3577 } 3578 if (pcbddc->dbg_flag) { 3579 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3580 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3581 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3582 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3583 for (i=0;i<j;i++) { 3584 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3585 } 3586 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3587 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3588 } 3589 /* shift the pattern on coarse candidates */ 3590 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3591 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3592 ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr); 3593 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3594 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3595 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3596 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3597 } 3598 if (pcbddc->dbg_flag) { 3599 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3600 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3601 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3602 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3603 } 3604 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3605 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3606 } else { 3607 if (pcbddc->dbg_flag) { 3608 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3609 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3610 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3611 } 3612 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3613 coarse_mat_is = t_coarse_mat_is; 3614 } 3615 3616 /* create local to global scatters for coarse problem */ 3617 if (compute_vecs) { 3618 PetscInt lrows; 3619 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3620 if (coarse_mat_is) { 3621 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3622 } else { 3623 lrows = 0; 3624 } 3625 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3626 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3627 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3628 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3629 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3630 } 3631 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3632 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3633 3634 /* set defaults for coarse KSP and PC */ 3635 if (multilevel_allowed) { 3636 coarse_ksp_type = KSPRICHARDSON; 3637 coarse_pc_type = PCBDDC; 3638 } else { 3639 coarse_ksp_type = KSPPREONLY; 3640 coarse_pc_type = PCREDUNDANT; 3641 } 3642 3643 /* print some info if requested */ 3644 if (pcbddc->dbg_flag) { 3645 if (!multilevel_allowed) { 3646 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3647 if (multilevel_requested) { 3648 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3649 } else if (pcbddc->max_levels) { 3650 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3651 } 3652 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3653 } 3654 } 3655 3656 /* create the coarse KSP object only once with defaults */ 3657 if (coarse_mat_is) { 3658 MatReuse coarse_mat_reuse; 3659 PetscViewer dbg_viewer = NULL; 3660 if (pcbddc->dbg_flag) { 3661 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3662 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3663 } 3664 if (!pcbddc->coarse_ksp) { 3665 char prefix[256],str_level[16]; 3666 size_t len; 3667 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3668 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3669 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3670 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3671 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3672 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3673 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3674 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3675 /* prefix */ 3676 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3677 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3678 if (!pcbddc->current_level) { 3679 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3680 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3681 } else { 3682 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3683 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3684 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3685 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3686 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3687 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3688 } 3689 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3690 /* allow user customization */ 3691 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3692 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3693 } 3694 3695 /* get some info after set from options */ 3696 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3697 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3698 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3699 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3700 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3701 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3702 isbddc = PETSC_FALSE; 3703 } 3704 if (isredundant) { 3705 KSP inner_ksp; 3706 PC inner_pc; 3707 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3708 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3709 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3710 } 3711 3712 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3713 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3714 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3715 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3716 if (nisdofs) { 3717 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3718 for (i=0;i<nisdofs;i++) { 3719 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3720 } 3721 } 3722 if (nisneu) { 3723 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3724 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3725 } 3726 3727 /* assemble coarse matrix */ 3728 if (coarse_reuse) { 3729 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3730 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3731 coarse_mat_reuse = MAT_REUSE_MATRIX; 3732 } else { 3733 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3734 } 3735 if (isbddc || isnn) { 3736 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3737 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3738 if (pcbddc->dbg_flag) { 3739 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3740 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3741 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3742 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3743 } 3744 } 3745 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3746 } else { 3747 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3748 } 3749 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3750 3751 /* propagate symmetry info to coarse matrix */ 3752 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3753 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3754 3755 /* set operators */ 3756 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3757 if (pcbddc->dbg_flag) { 3758 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3759 } 3760 } else { /* processes non partecipating to coarse solver (if any) */ 3761 coarse_mat = 0; 3762 } 3763 ierr = PetscFree(isarray);CHKERRQ(ierr); 3764 #if 0 3765 { 3766 PetscViewer viewer; 3767 char filename[256]; 3768 sprintf(filename,"coarse_mat.m"); 3769 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3770 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3771 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3772 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3773 } 3774 #endif 3775 3776 /* Compute coarse null space (special handling by BDDC only) */ 3777 if (pcbddc->NullSpace) { 3778 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3779 } 3780 3781 if (pcbddc->coarse_ksp) { 3782 Vec crhs,csol; 3783 PetscBool ispreonly; 3784 if (CoarseNullSpace) { 3785 if (isbddc) { 3786 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3787 } else { 3788 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3789 } 3790 } 3791 /* setup coarse ksp */ 3792 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3793 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3794 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3795 /* hack */ 3796 if (!csol) { 3797 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3798 } 3799 if (!crhs) { 3800 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3801 } 3802 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3803 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3804 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3805 KSP check_ksp; 3806 KSPType check_ksp_type; 3807 PC check_pc; 3808 Vec check_vec,coarse_vec; 3809 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 3810 PetscInt its; 3811 PetscBool compute_eigs; 3812 PetscReal *eigs_r,*eigs_c; 3813 PetscInt neigs; 3814 const char *prefix; 3815 3816 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3817 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3818 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3819 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3820 if (ispreonly) { 3821 check_ksp_type = KSPPREONLY; 3822 compute_eigs = PETSC_FALSE; 3823 } else { 3824 check_ksp_type = KSPGMRES; 3825 compute_eigs = PETSC_TRUE; 3826 } 3827 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3828 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3829 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3830 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3831 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 3832 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 3833 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 3834 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 3835 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3836 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3837 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3838 /* create random vec */ 3839 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 3840 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 3841 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3842 if (CoarseNullSpace) { 3843 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3844 } 3845 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3846 /* solve coarse problem */ 3847 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 3848 if (CoarseNullSpace) { 3849 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 3850 } 3851 /* set eigenvalue estimation if preonly has not been requested */ 3852 if (compute_eigs) { 3853 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr); 3854 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr); 3855 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 3856 lambda_max = eigs_r[neigs-1]; 3857 lambda_min = eigs_r[0]; 3858 if (pcbddc->use_coarse_estimates) { 3859 if (lambda_max>lambda_min) { 3860 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3861 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3862 } 3863 } 3864 } 3865 3866 /* check coarse problem residual error */ 3867 if (pcbddc->dbg_flag) { 3868 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 3869 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3870 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 3871 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3872 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3873 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3874 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3875 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 3876 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 3877 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 3878 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3879 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3880 if (compute_eigs) { 3881 PetscReal lambda_max_s,lambda_min_s; 3882 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3883 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 3884 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 3885 for (i=0;i<neigs;i++) { 3886 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 3887 } 3888 } 3889 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3890 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3891 } 3892 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3893 if (compute_eigs) { 3894 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 3895 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 3896 } 3897 } 3898 } 3899 /* print additional info */ 3900 if (pcbddc->dbg_flag) { 3901 /* waits until all processes reaches this point */ 3902 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 3903 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3904 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3905 } 3906 3907 /* free memory */ 3908 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3909 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3910 PetscFunctionReturn(0); 3911 } 3912 3913 #undef __FUNCT__ 3914 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3915 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3916 { 3917 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3918 PC_IS* pcis = (PC_IS*)pc->data; 3919 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3920 PetscInt i,coarse_size; 3921 PetscInt *local_primal_indices; 3922 PetscErrorCode ierr; 3923 3924 PetscFunctionBegin; 3925 /* Compute global number of coarse dofs */ 3926 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3927 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3928 } 3929 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3930 3931 /* check numbering */ 3932 if (pcbddc->dbg_flag) { 3933 PetscScalar coarsesum,*array; 3934 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 3935 3936 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3937 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3938 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3939 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3940 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3941 for (i=0;i<pcbddc->local_primal_size;i++) { 3942 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3943 } 3944 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3945 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3946 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3947 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3948 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3949 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3950 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3951 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3952 for (i=0;i<pcis->n;i++) { 3953 if (array[i] == 1.0) { 3954 set_error = PETSC_TRUE; 3955 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3956 } 3957 } 3958 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3959 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3960 for (i=0;i<pcis->n;i++) { 3961 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3962 } 3963 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3964 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3965 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3966 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3967 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3968 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3969 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 3970 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3971 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3972 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3973 for (i=0;i<pcbddc->local_primal_size;i++) { 3974 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3975 } 3976 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3977 } 3978 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3979 if (set_error_reduced) { 3980 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 3981 } 3982 } 3983 /* get back data */ 3984 *coarse_size_n = coarse_size; 3985 *local_primal_indices_n = local_primal_indices; 3986 PetscFunctionReturn(0); 3987 } 3988 3989 #undef __FUNCT__ 3990 #define __FUNCT__ "PCBDDCGlobalToLocal" 3991 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 3992 { 3993 IS localis_t; 3994 PetscInt i,lsize,*idxs,n; 3995 PetscScalar *vals; 3996 PetscErrorCode ierr; 3997 3998 PetscFunctionBegin; 3999 /* get indices in local ordering exploiting local to global map */ 4000 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4001 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 4002 for (i=0;i<lsize;i++) vals[i] = 1.0; 4003 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4004 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4005 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4006 if (idxs) { /* multilevel guard */ 4007 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4008 } 4009 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4010 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4011 ierr = PetscFree(vals);CHKERRQ(ierr); 4012 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4013 /* now compute set in local ordering */ 4014 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4015 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4016 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4017 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4018 for (i=0,lsize=0;i<n;i++) { 4019 if (PetscRealPart(vals[i]) > 0.5) { 4020 lsize++; 4021 } 4022 } 4023 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 4024 for (i=0,lsize=0;i<n;i++) { 4025 if (PetscRealPart(vals[i]) > 0.5) { 4026 idxs[lsize++] = i; 4027 } 4028 } 4029 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4030 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4031 *localis = localis_t; 4032 PetscFunctionReturn(0); 4033 } 4034 4035 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4036 #undef __FUNCT__ 4037 #define __FUNCT__ "PCBDDCMatMult_Private" 4038 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4039 { 4040 PCBDDCChange_ctx change_ctx; 4041 PetscErrorCode ierr; 4042 4043 PetscFunctionBegin; 4044 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4045 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4046 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4047 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4048 PetscFunctionReturn(0); 4049 } 4050 4051 #undef __FUNCT__ 4052 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4053 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4054 { 4055 PCBDDCChange_ctx change_ctx; 4056 PetscErrorCode ierr; 4057 4058 PetscFunctionBegin; 4059 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4060 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4061 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4062 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4063 PetscFunctionReturn(0); 4064 } 4065