1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 76 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCResetSolvers" 82 PetscErrorCode PCBDDCResetSolvers(PC pc) 83 { 84 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 89 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 94 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 100 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 101 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 104 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 107 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 108 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 110 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 111 PetscFunctionReturn(0); 112 } 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 116 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 117 { 118 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 119 PC_IS *pcis = (PC_IS*)pc->data; 120 VecType impVecType; 121 PetscInt n_constraints,n_R,old_size; 122 PetscErrorCode ierr; 123 124 PetscFunctionBegin; 125 if (!pcbddc->ConstraintMatrix) { 126 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 127 } 128 /* get sizes */ 129 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 130 n_R = pcis->n-pcbddc->n_actual_vertices; 131 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 132 /* local work vectors (try to avoid unneeded work)*/ 133 /* R nodes */ 134 old_size = -1; 135 if (pcbddc->vec1_R) { 136 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 137 } 138 if (n_R != old_size) { 139 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 140 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 141 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 142 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 143 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 144 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 145 } 146 /* local primal dofs */ 147 old_size = -1; 148 if (pcbddc->vec1_P) { 149 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 150 } 151 if (pcbddc->local_primal_size != old_size) { 152 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 153 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 154 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 155 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 156 } 157 /* local explicit constraints */ 158 old_size = -1; 159 if (pcbddc->vec1_C) { 160 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 161 } 162 if (n_constraints && n_constraints != old_size) { 163 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 164 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 165 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 166 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNCT__ 172 #define __FUNCT__ "PCBDDCSetUpCorrection" 173 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 174 { 175 PetscErrorCode ierr; 176 /* pointers to pcis and pcbddc */ 177 PC_IS* pcis = (PC_IS*)pc->data; 178 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 179 /* submatrices of local problem */ 180 Mat A_RV,A_VR,A_VV; 181 /* working matrices */ 182 Mat M1,M2,M3,C_CR; 183 /* working vectors */ 184 Vec vec1_C,vec2_C,vec1_V,vec2_V; 185 /* additional working stuff */ 186 IS is_aux; 187 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 188 const PetscScalar *array,*row_cmat_values; 189 const PetscInt *row_cmat_indices,*idx_R_local; 190 PetscInt *idx_V_B,*auxindices; 191 PetscInt n_vertices,n_constraints,size_of_constraint; 192 PetscInt i,j,n_R,n_D,n_B; 193 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check; 194 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 195 MatType impMatType; 196 /* some shortcuts to scalars */ 197 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 198 /* for debugging purposes */ 199 PetscReal *coarsefunctions_errors,*constraints_errors; 200 201 PetscFunctionBegin; 202 /* get number of vertices (corners plus constraints with change of basis) 203 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 204 n_vertices = pcbddc->n_actual_vertices; 205 n_constraints = pcbddc->local_primal_size-n_vertices; 206 /* Set Non-overlapping dimensions */ 207 n_B = pcis->n_B; n_D = pcis->n - n_B; 208 n_R = pcis->n-n_vertices; 209 210 /* Set types for local objects needed by BDDC precondtioner */ 211 impMatType = MATSEQDENSE; 212 213 /* Allocating some extra storage just to be safe */ 214 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 215 for (i=0;i<pcis->n;i++) auxindices[i]=i; 216 217 /* vertices in boundary numbering */ 218 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 219 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 220 if (i != n_vertices) { 221 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 222 } 223 224 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 225 if (n_constraints) { 226 /* see if we can save some allocations */ 227 if (pcbddc->local_auxmat2) { 228 PetscInt on_R,on_constraints; 229 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 230 if (on_R != n_R || on_constraints != n_constraints) { 231 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 232 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 233 } 234 } 235 /* work vectors */ 236 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 237 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 238 /* auxiliary matrices */ 239 if (!pcbddc->local_auxmat2) { 240 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 241 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 242 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 243 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 244 } 245 246 /* Extract constraints on R nodes: C_{CR} */ 247 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 248 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 249 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 250 251 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 252 for (i=0;i<n_constraints;i++) { 253 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 254 /* Get row of constraint matrix in R numbering */ 255 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 256 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 257 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 258 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 259 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 260 /* Solve for row of constraint matrix in R numbering */ 261 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 262 /* Set values in local_auxmat2 */ 263 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 264 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 265 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 266 } 267 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 268 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 269 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 270 271 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 272 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 273 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 274 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 275 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 276 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 277 ierr = MatSetUp(M1);CHKERRQ(ierr); 278 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 279 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 280 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 281 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 282 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 283 ierr = MatDestroy(&M2);CHKERRQ(ierr); 284 ierr = MatDestroy(&M3);CHKERRQ(ierr); 285 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 286 if (!pcbddc->local_auxmat1) { 287 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 288 } else { 289 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 290 } 291 } 292 293 /* Get submatrices from subdomain matrix */ 294 if (n_vertices) { 295 PetscInt ibs,mbs; 296 PetscBool issbaij; 297 Mat newmat; 298 299 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 300 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 301 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 302 if (ibs != mbs) { /* need to convert to SEQAIJ */ 303 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 304 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 305 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 307 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 308 } else { 309 /* this is safe */ 310 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 311 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 312 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 313 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 314 /* which of the two approaches is faster? */ 315 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 316 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 317 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 318 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 319 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 320 } else { 321 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 322 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 323 } 324 } 325 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 326 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 327 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 328 } 329 330 /* Matrix of coarse basis functions (local) */ 331 if (pcbddc->coarse_phi_B) { 332 PetscInt on_B,on_primal; 333 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 334 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 335 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 336 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 337 } 338 } 339 if (pcbddc->coarse_phi_D) { 340 PetscInt on_D,on_primal; 341 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 342 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 343 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 344 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 345 } 346 } 347 if (!pcbddc->coarse_phi_B) { 348 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 349 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 350 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 351 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 352 } 353 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 354 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 355 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 356 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 357 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 358 } 359 360 if (pcbddc->dbg_flag) { 361 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 362 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 363 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 364 } 365 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 366 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 367 368 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 369 370 /* vertices */ 371 for (i=0;i<n_vertices;i++) { 372 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 373 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 374 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 375 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 376 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 377 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 378 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 379 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 380 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 381 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 382 if (n_constraints) { 383 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 384 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 385 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 386 } 387 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 388 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 389 390 /* Set values in coarse basis function and subdomain part of coarse_mat */ 391 /* coarse basis functions */ 392 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 393 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 394 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 395 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 396 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 397 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 398 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 399 if (pcbddc->switch_static || pcbddc->dbg_flag) { 400 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 401 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 402 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 403 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 404 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 405 } 406 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 407 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 408 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 409 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 410 if (n_constraints) { 411 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 412 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 413 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 414 } 415 416 /* check */ 417 if (pcbddc->dbg_flag) { 418 /* assemble subdomain vector on local nodes */ 419 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 420 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 421 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 422 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 423 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 425 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 426 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 427 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 428 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 429 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 430 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 431 if (n_constraints) { 432 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 433 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 434 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 435 } 436 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 437 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 438 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 439 /* check saddle point solution */ 440 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 441 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 442 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 443 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 444 /* shift by the identity matrix */ 445 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 446 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 447 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 449 } 450 } 451 452 /* constraints */ 453 for (i=0;i<n_constraints;i++) { 454 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 455 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 456 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 457 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 458 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 459 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 460 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 461 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 462 if (n_vertices) { 463 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 464 } 465 /* Set values in coarse basis function and subdomain part of coarse_mat */ 466 /* coarse basis functions */ 467 j = i+n_vertices; /* don't touch this! */ 468 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 469 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 470 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 472 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 473 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 474 if (pcbddc->switch_static || pcbddc->dbg_flag) { 475 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 476 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 478 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 479 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 480 } 481 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 482 if (n_vertices) { 483 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 484 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 485 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 486 } 487 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 488 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 489 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 490 491 if (pcbddc->dbg_flag) { 492 /* assemble subdomain vector on nodes */ 493 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 494 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 495 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 496 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 497 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 498 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 499 /* assemble subdomain vector of lagrange multipliers */ 500 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 501 if (n_vertices) { 502 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 503 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 504 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 505 } 506 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 507 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 508 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 509 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 510 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 511 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 512 /* check saddle point solution */ 513 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 514 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 515 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 516 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 517 /* shift by the identity matrix */ 518 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 519 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 520 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 521 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 522 } 523 } 524 /* call assembling routines for local coarse basis */ 525 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 526 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 527 if (pcbddc->switch_static || pcbddc->dbg_flag) { 528 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 529 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 530 } 531 532 /* compute other basis functions for non-symmetric problems */ 533 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 534 if (!setsym || (setsym && !issym)) { 535 if (!pcbddc->coarse_psi_B) { 536 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 537 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 538 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 539 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 540 } 541 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 542 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 543 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 544 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 545 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 546 } 547 for (i=0;i<pcbddc->local_primal_size;i++) { 548 if (n_constraints) { 549 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 550 for (j=0;j<n_constraints;j++) { 551 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 552 } 553 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 554 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 555 } 556 if (i<n_vertices) { 557 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 558 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 559 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 560 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 561 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 562 if (n_constraints) { 563 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 564 } 565 } else { 566 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 567 } 568 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 569 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 570 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 571 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 572 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 573 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 574 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 575 if (i<n_vertices) { 576 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 577 } 578 if (pcbddc->switch_static || pcbddc->dbg_flag) { 579 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 580 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 581 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 582 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 583 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 584 } 585 586 if (pcbddc->dbg_flag) { 587 /* assemble subdomain vector on nodes */ 588 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 589 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 590 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 591 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 592 if (i<n_vertices) { 593 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 594 } 595 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 596 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 597 /* assemble subdomain vector of lagrange multipliers */ 598 for (j=0;j<pcbddc->local_primal_size;j++) { 599 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 600 } 601 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 602 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 603 /* check saddle point solution */ 604 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 605 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 606 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 607 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 608 /* shift by the identity matrix */ 609 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 610 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 611 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 612 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 613 } 614 } 615 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 616 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 617 if (pcbddc->switch_static || pcbddc->dbg_flag) { 618 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 619 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 620 } 621 unsymmetric_check = PETSC_TRUE; 622 } else { /* take references to already computed coarse basis */ 623 unsymmetric_check = PETSC_FALSE; 624 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 625 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 626 if (pcbddc->coarse_phi_D) { 627 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 628 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 629 } 630 } 631 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 632 /* Checking coarse_sub_mat and coarse basis functios */ 633 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 634 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 635 if (pcbddc->dbg_flag) { 636 Mat coarse_sub_mat; 637 Mat AUXMAT,TM1,TM2,TM3,TM4; 638 Mat coarse_phi_D,coarse_phi_B; 639 Mat coarse_psi_D,coarse_psi_B; 640 Mat A_II,A_BB,A_IB,A_BI; 641 MatType checkmattype=MATSEQAIJ; 642 PetscReal real_value; 643 644 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 645 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 646 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 647 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 648 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 649 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 650 if (unsymmetric_check) { 651 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 652 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 653 } 654 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 655 656 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 657 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 658 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 659 if (unsymmetric_check) { 660 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 661 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 662 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 663 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 664 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 665 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 666 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 667 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 668 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 669 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 670 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 671 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 672 } else { 673 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 674 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 675 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 676 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 677 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 678 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 679 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 680 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 681 } 682 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 683 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 684 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 685 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 686 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 687 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 688 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 689 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 693 for (i=0;i<pcbddc->local_primal_size;i++) { 694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 695 } 696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 697 for (i=0;i<pcbddc->local_primal_size;i++) { 698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 699 } 700 if (unsymmetric_check) { 701 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 702 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 704 } 705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 706 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 708 } 709 } 710 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 711 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 712 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 713 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 714 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 715 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 716 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 717 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 718 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 719 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 720 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 721 if (unsymmetric_check) { 722 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 723 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 724 } 725 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 726 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 727 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 728 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 729 } 730 /* free memory */ 731 if (n_vertices) { 732 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 733 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 734 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 735 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 736 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 737 } 738 if (n_constraints) { 739 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 740 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 741 ierr = MatDestroy(&M1);CHKERRQ(ierr); 742 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 743 } 744 ierr = PetscFree(auxindices);CHKERRQ(ierr); 745 /* get back data */ 746 *coarse_submat_vals_n = coarse_submat_vals; 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNCT__ 751 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 752 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 753 { 754 PC_IS* pcis = (PC_IS*)(pc->data); 755 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 756 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 757 PetscBool issbaij,isbaij; 758 /* manage repeated solves */ 759 MatReuse reuse; 760 MatStructure matstruct; 761 PetscErrorCode ierr; 762 763 PetscFunctionBegin; 764 if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) { 765 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 766 } 767 /* get mat flags */ 768 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 769 reuse = MAT_INITIAL_MATRIX; 770 if (pc->setupcalled) { 771 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 772 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 773 if (matstruct == SAME_NONZERO_PATTERN) { 774 reuse = MAT_REUSE_MATRIX; 775 } else { 776 reuse = MAT_INITIAL_MATRIX; 777 } 778 } 779 if (reuse == MAT_INITIAL_MATRIX) { 780 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 781 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 782 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 783 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 784 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 785 } 786 787 /* transform local matrices if needed */ 788 if (pcbddc->use_change_of_basis) { 789 Mat change_mat_all; 790 PetscScalar *row_cmat_values; 791 PetscInt *row_cmat_indices; 792 PetscInt *nnz,*is_indices,*temp_indices; 793 PetscInt i,j,k,n_D,n_B; 794 795 /* Get Non-overlapping dimensions */ 796 n_B = pcis->n_B; 797 n_D = pcis->n-n_B; 798 799 /* compute nonzero structure of change of basis on all local nodes */ 800 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 801 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 802 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 803 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 804 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 805 k=1; 806 for (i=0;i<n_B;i++) { 807 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 808 nnz[is_indices[i]]=j; 809 if (k < j) k = j; 810 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 811 } 812 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 813 /* assemble change of basis matrix on the whole set of local dofs */ 814 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 815 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 816 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 817 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 818 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 819 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 820 for (i=0;i<n_D;i++) { 821 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 822 } 823 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 824 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 825 for (i=0;i<n_B;i++) { 826 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 827 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 828 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 829 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 830 } 831 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 832 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 833 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 834 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 835 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 836 if (!issbaij && !isbaij) { 837 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 838 } else { 839 Mat work_mat; 840 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 841 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 842 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 843 } 844 /* 845 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 846 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 847 */ 848 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 849 ierr = PetscFree(nnz);CHKERRQ(ierr); 850 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 851 } else { 852 /* without change of basis, the local matrix is unchanged */ 853 if (!pcbddc->local_mat) { 854 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 855 pcbddc->local_mat = matis->A; 856 } 857 } 858 859 /* get submatrices */ 860 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 861 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 862 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 863 if (!issbaij) { 864 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 865 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 866 } else { 867 Mat newmat; 868 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 869 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 870 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 871 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 872 } 873 PetscFunctionReturn(0); 874 } 875 876 #undef __FUNCT__ 877 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 878 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 879 { 880 PC_IS* pcis = (PC_IS*)(pc->data); 881 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 882 IS is_aux1,is_aux2; 883 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 884 PetscInt n_vertices,i,j,n_R,n_D,n_B; 885 PetscInt vbs,bs; 886 PetscBT bitmask; 887 PetscErrorCode ierr; 888 889 PetscFunctionBegin; 890 /* 891 No need to setup local scatters if 892 - primal space is unchanged 893 AND 894 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 895 AND 896 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 897 */ 898 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 899 PetscFunctionReturn(0); 900 } 901 /* destroy old objects */ 902 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 903 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 904 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 905 /* Set Non-overlapping dimensions */ 906 n_B = pcis->n_B; n_D = pcis->n - n_B; 907 n_vertices = pcbddc->n_actual_vertices; 908 /* create auxiliary bitmask */ 909 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 910 for (i=0;i<n_vertices;i++) { 911 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 912 } 913 914 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 915 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 916 for (i=0, n_R=0; i<pcis->n; i++) { 917 if (!PetscBTLookup(bitmask,i)) { 918 idx_R_local[n_R] = i; 919 n_R++; 920 } 921 } 922 923 /* Block code */ 924 vbs = 1; 925 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 926 if (bs>1 && !(n_vertices%bs)) { 927 PetscBool is_blocked = PETSC_TRUE; 928 PetscInt *vary; 929 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 930 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 931 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 932 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 933 for (i=0; i<n_vertices; i++) { 934 if (vary[i]!=0 && vary[i]!=bs) { 935 is_blocked = PETSC_FALSE; 936 break; 937 } 938 } 939 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 940 vbs = bs; 941 for (i=0;i<n_R/vbs;i++) { 942 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 943 } 944 } 945 ierr = PetscFree(vary);CHKERRQ(ierr); 946 } 947 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 948 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 949 950 /* print some info if requested */ 951 if (pcbddc->dbg_flag) { 952 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 953 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 954 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 956 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 957 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 958 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 959 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 960 } 961 962 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 963 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 964 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 965 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 966 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 967 for (i=0; i<n_D; i++) { 968 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 969 } 970 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 971 for (i=0, j=0; i<n_R; i++) { 972 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 973 aux_array1[j++] = i; 974 } 975 } 976 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 977 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 978 for (i=0, j=0; i<n_B; i++) { 979 if (!PetscBTLookup(bitmask,is_indices[i])) { 980 aux_array2[j++] = i; 981 } 982 } 983 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 984 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 985 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 986 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 987 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 988 989 if (pcbddc->switch_static || pcbddc->dbg_flag) { 990 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 991 for (i=0, j=0; i<n_R; i++) { 992 if (PetscBTLookup(bitmask,idx_R_local[i])) { 993 aux_array1[j++] = i; 994 } 995 } 996 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 997 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 998 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 999 } 1000 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1001 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1008 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1009 { 1010 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1011 PC_IS *pcis = (PC_IS*)pc->data; 1012 PC pc_temp; 1013 Mat A_RR; 1014 MatStructure matstruct; 1015 MatReuse reuse; 1016 PetscScalar m_one = -1.0; 1017 PetscReal value; 1018 PetscInt n_D,n_R,ibs,mbs; 1019 PetscBool use_exact,use_exact_reduced,issbaij; 1020 PetscErrorCode ierr; 1021 /* prefixes stuff */ 1022 char dir_prefix[256],neu_prefix[256],str_level[3]; 1023 size_t len; 1024 1025 PetscFunctionBegin; 1026 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 1027 1028 /* compute prefixes */ 1029 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1030 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1031 if (!pcbddc->current_level) { 1032 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1033 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1034 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1035 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1036 } else { 1037 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1038 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 1039 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1040 len -= 15; /* remove "pc_bddc_coarse_" */ 1041 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 1042 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1043 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1044 *(dir_prefix+len)='\0'; 1045 *(neu_prefix+len)='\0'; 1046 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1047 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1048 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1049 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1050 } 1051 1052 /* DIRICHLET PROBLEM */ 1053 /* Matrix for Dirichlet problem is pcis->A_II */ 1054 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1055 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1056 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1057 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1058 /* default */ 1059 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1060 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1061 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1062 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1063 if (issbaij) { 1064 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1065 } else { 1066 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1067 } 1068 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1069 } 1070 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 1071 /* Allow user's customization */ 1072 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1073 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1074 if (!n_D) { 1075 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1076 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1077 } 1078 /* Set Up KSP for Dirichlet problem of BDDC */ 1079 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1080 /* set ksp_D into pcis data */ 1081 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1082 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1083 pcis->ksp_D = pcbddc->ksp_D; 1084 1085 /* NEUMANN PROBLEM */ 1086 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1087 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1088 if (pcbddc->ksp_R) { /* already created ksp */ 1089 PetscInt nn_R; 1090 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr); 1091 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1092 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1093 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1094 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1095 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1096 reuse = MAT_INITIAL_MATRIX; 1097 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1098 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1099 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1100 reuse = MAT_INITIAL_MATRIX; 1101 } else { /* safe to reuse the matrix */ 1102 reuse = MAT_REUSE_MATRIX; 1103 } 1104 } 1105 /* last check */ 1106 if (matstruct == DIFFERENT_NONZERO_PATTERN) { 1107 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1108 reuse = MAT_INITIAL_MATRIX; 1109 } 1110 } else { /* first time, so we need to create the matrix */ 1111 reuse = MAT_INITIAL_MATRIX; 1112 } 1113 /* extract A_RR */ 1114 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1115 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1116 if (ibs != mbs) { 1117 Mat newmat; 1118 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1119 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1120 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1121 } else { 1122 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1123 } 1124 if (!pcbddc->ksp_R) { /* create object if not present */ 1125 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1126 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1127 /* default */ 1128 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1129 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1130 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1131 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1132 if (issbaij) { 1133 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1134 } else { 1135 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1136 } 1137 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1138 } 1139 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1140 /* Allow user's customization */ 1141 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1142 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1143 if (!n_R) { 1144 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1145 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1146 } 1147 /* Set Up KSP for Neumann problem of BDDC */ 1148 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1149 1150 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1151 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1152 /* Dirichlet */ 1153 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1154 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1155 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1156 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1157 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1158 /* need to be adapted? */ 1159 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1160 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1161 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1162 /* print info */ 1163 if (pcbddc->dbg_flag) { 1164 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1165 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1166 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1167 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1168 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1169 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1170 } 1171 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1172 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1173 } 1174 1175 /* Neumann */ 1176 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1177 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1178 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1179 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1180 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1181 /* need to be adapted? */ 1182 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1183 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1184 /* print info */ 1185 if (pcbddc->dbg_flag) { 1186 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1187 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1188 } 1189 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1190 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1191 } 1192 } 1193 /* free Neumann problem's matrix */ 1194 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1200 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1201 { 1202 PetscErrorCode ierr; 1203 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1204 1205 PetscFunctionBegin; 1206 if (applytranspose) { 1207 if (pcbddc->local_auxmat1) { 1208 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1209 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1210 } 1211 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1212 } else { 1213 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1214 if (pcbddc->local_auxmat1) { 1215 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1216 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1217 } 1218 } 1219 PetscFunctionReturn(0); 1220 } 1221 1222 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1223 #undef __FUNCT__ 1224 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1225 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1226 { 1227 PetscErrorCode ierr; 1228 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1229 PC_IS* pcis = (PC_IS*) (pc->data); 1230 const PetscScalar zero = 0.0; 1231 1232 PetscFunctionBegin; 1233 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1234 if (applytranspose) { 1235 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1236 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1237 } else { 1238 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1239 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1240 } 1241 /* Scatter data of coarse_rhs */ 1242 ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 1243 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 1245 /* Local solution on R nodes */ 1246 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1247 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1248 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1249 if (pcbddc->switch_static) { 1250 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1251 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1252 } 1253 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1254 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1255 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1256 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1257 if (pcbddc->switch_static) { 1258 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1259 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1260 } 1261 1262 /* Coarse solution */ 1263 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 /* TODO remove null space when doing multilevel */ 1265 if (applytranspose) { 1266 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1267 } else { 1268 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1269 } 1270 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1271 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1272 1273 /* Sum contributions from two levels */ 1274 if (applytranspose) { 1275 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1276 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1277 } else { 1278 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1279 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1280 } 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1286 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1287 { 1288 PetscErrorCode ierr; 1289 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1290 1291 PetscFunctionBegin; 1292 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1298 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1299 { 1300 PetscErrorCode ierr; 1301 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1302 1303 PetscFunctionBegin; 1304 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 /* uncomment for testing purposes */ 1309 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1312 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1313 { 1314 PetscErrorCode ierr; 1315 PC_IS* pcis = (PC_IS*)(pc->data); 1316 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1317 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1318 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1319 MatType impMatType=MATSEQAIJ; 1320 /* one and zero */ 1321 PetscScalar one=1.0,zero=0.0; 1322 /* space to store constraints and their local indices */ 1323 PetscScalar *temp_quadrature_constraint; 1324 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1325 /* iterators */ 1326 PetscInt i,j,k,total_counts,temp_start_ptr; 1327 /* stuff to store connected components stored in pcbddc->mat_graph */ 1328 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1329 PetscInt n_ISForFaces,n_ISForEdges; 1330 /* near null space stuff */ 1331 MatNullSpace nearnullsp; 1332 const Vec *nearnullvecs; 1333 Vec *localnearnullsp; 1334 PetscBool nnsp_has_cnst; 1335 PetscInt nnsp_size; 1336 PetscScalar *array; 1337 /* BLAS integers */ 1338 PetscBLASInt lwork,lierr; 1339 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1340 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1341 /* LAPACK working arrays for SVD or POD */ 1342 PetscBool skip_lapack; 1343 PetscScalar *work; 1344 PetscReal *singular_vals; 1345 #if defined(PETSC_USE_COMPLEX) 1346 PetscReal *rwork; 1347 #endif 1348 #if defined(PETSC_MISSING_LAPACK_GESVD) 1349 PetscBLASInt Blas_one_2=1; 1350 PetscScalar *temp_basis,*correlation_mat; 1351 #else 1352 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1353 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1354 #endif 1355 /* reuse */ 1356 PetscInt olocal_primal_size; 1357 PetscInt *oprimal_indices_local_idxs; 1358 /* change of basis */ 1359 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1360 PetscBool boolforchange,qr_needed; 1361 PetscBT touched,change_basis,qr_needed_idx; 1362 /* auxiliary stuff */ 1363 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1364 /* some quantities */ 1365 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1366 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1367 1368 1369 PetscFunctionBegin; 1370 /* Destroy Mat objects computed previously */ 1371 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1372 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1373 /* Get index sets for faces, edges and vertices from graph */ 1374 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1375 pcbddc->use_vertices = PETSC_TRUE; 1376 } 1377 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1378 /* HACK: provide functions to set change of basis */ 1379 if (!ISForVertices && pcbddc->NullSpace) { 1380 pcbddc->use_change_of_basis = PETSC_TRUE; 1381 pcbddc->use_change_on_faces = PETSC_FALSE; 1382 } 1383 /* print some info */ 1384 if (pcbddc->dbg_flag) { 1385 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1386 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1387 i = 0; 1388 if (ISForVertices) { 1389 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1390 } 1391 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1392 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1393 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1394 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1395 } 1396 /* check if near null space is attached to global mat */ 1397 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1398 if (nearnullsp) { 1399 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1400 /* remove any stored info */ 1401 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1402 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1403 /* store information for BDDC solver reuse */ 1404 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1405 pcbddc->onearnullspace = nearnullsp; 1406 ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1407 for (i=0;i<nnsp_size;i++) { 1408 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1409 } 1410 } else { /* if near null space is not provided BDDC uses constants by default */ 1411 nnsp_size = 0; 1412 nnsp_has_cnst = PETSC_TRUE; 1413 } 1414 /* get max number of constraints on a single cc */ 1415 max_constraints = nnsp_size; 1416 if (nnsp_has_cnst) max_constraints++; 1417 1418 /* 1419 Evaluate maximum storage size needed by the procedure 1420 - temp_indices will contain start index of each constraint stored as follows 1421 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1422 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1423 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1424 */ 1425 total_counts = n_ISForFaces+n_ISForEdges; 1426 total_counts *= max_constraints; 1427 n_vertices = 0; 1428 if (ISForVertices) { 1429 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1430 } 1431 total_counts += n_vertices; 1432 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1433 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1434 total_counts = 0; 1435 max_size_of_constraint = 0; 1436 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1437 if (i<n_ISForEdges) { 1438 used_IS = &ISForEdges[i]; 1439 } else { 1440 used_IS = &ISForFaces[i-n_ISForEdges]; 1441 } 1442 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1443 total_counts += j; 1444 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1445 } 1446 total_counts *= max_constraints; 1447 total_counts += n_vertices; 1448 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1449 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1450 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1451 /* get local part of global near null space vectors */ 1452 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1453 for (k=0;k<nnsp_size;k++) { 1454 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1455 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1456 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1457 } 1458 1459 /* whether or not to skip lapack calls */ 1460 skip_lapack = PETSC_TRUE; 1461 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1462 1463 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1464 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1465 PetscScalar temp_work; 1466 #if defined(PETSC_MISSING_LAPACK_GESVD) 1467 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1468 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1469 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1470 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1471 #if defined(PETSC_USE_COMPLEX) 1472 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1473 #endif 1474 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1475 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1476 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1477 lwork = -1; 1478 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1479 #if !defined(PETSC_USE_COMPLEX) 1480 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1481 #else 1482 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1483 #endif 1484 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1485 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1486 #else /* on missing GESVD */ 1487 /* SVD */ 1488 PetscInt max_n,min_n; 1489 max_n = max_size_of_constraint; 1490 min_n = max_constraints; 1491 if (max_size_of_constraint < max_constraints) { 1492 min_n = max_size_of_constraint; 1493 max_n = max_constraints; 1494 } 1495 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1496 #if defined(PETSC_USE_COMPLEX) 1497 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1498 #endif 1499 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1500 lwork = -1; 1501 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1502 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1503 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1504 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1505 #if !defined(PETSC_USE_COMPLEX) 1506 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1507 #else 1508 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1509 #endif 1510 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1511 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1512 #endif /* on missing GESVD */ 1513 /* Allocate optimal workspace */ 1514 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1515 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1516 } 1517 /* Now we can loop on constraining sets */ 1518 total_counts = 0; 1519 temp_indices[0] = 0; 1520 /* vertices */ 1521 if (ISForVertices) { 1522 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1523 if (nnsp_has_cnst) { /* consider all vertices */ 1524 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1525 for (i=0;i<n_vertices;i++) { 1526 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1527 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1528 total_counts++; 1529 } 1530 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1531 PetscBool used_vertex; 1532 for (i=0;i<n_vertices;i++) { 1533 used_vertex = PETSC_FALSE; 1534 k = 0; 1535 while (!used_vertex && k<nnsp_size) { 1536 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1537 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1538 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1539 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1540 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1541 total_counts++; 1542 used_vertex = PETSC_TRUE; 1543 } 1544 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1545 k++; 1546 } 1547 } 1548 } 1549 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1550 n_vertices = total_counts; 1551 } 1552 1553 /* edges and faces */ 1554 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1555 if (i<n_ISForEdges) { 1556 used_IS = &ISForEdges[i]; 1557 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1558 } else { 1559 used_IS = &ISForFaces[i-n_ISForEdges]; 1560 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1561 } 1562 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1563 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1564 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1565 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1566 /* change of basis should not be performed on local periodic nodes */ 1567 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1568 if (nnsp_has_cnst) { 1569 PetscScalar quad_value; 1570 temp_constraints++; 1571 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1572 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1573 for (j=0;j<size_of_constraint;j++) { 1574 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1575 } 1576 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1577 total_counts++; 1578 } 1579 for (k=0;k<nnsp_size;k++) { 1580 PetscReal real_value; 1581 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1582 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1583 for (j=0;j<size_of_constraint;j++) { 1584 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1585 } 1586 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1587 /* check if array is null on the connected component */ 1588 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1589 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1590 if (real_value > 0.0) { /* keep indices and values */ 1591 temp_constraints++; 1592 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1593 total_counts++; 1594 } 1595 } 1596 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1597 valid_constraints = temp_constraints; 1598 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1599 if (!pcbddc->use_nnsp_true && temp_constraints) { 1600 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1601 1602 #if defined(PETSC_MISSING_LAPACK_GESVD) 1603 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1604 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1605 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1606 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1607 from that computed using LAPACKgesvd 1608 -> This is due to a different computation of eigenvectors in LAPACKheev 1609 -> The quality of the POD-computed basis will be the same */ 1610 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1611 /* Store upper triangular part of correlation matrix */ 1612 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1613 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1614 for (j=0;j<temp_constraints;j++) { 1615 for (k=0;k<j+1;k++) { 1616 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1617 } 1618 } 1619 /* compute eigenvalues and eigenvectors of correlation matrix */ 1620 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1621 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1622 #if !defined(PETSC_USE_COMPLEX) 1623 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1624 #else 1625 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1626 #endif 1627 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1628 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1629 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1630 j=0; 1631 while (j < temp_constraints && singular_vals[j] < tol) j++; 1632 total_counts=total_counts-j; 1633 valid_constraints = temp_constraints-j; 1634 /* scale and copy POD basis into used quadrature memory */ 1635 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1636 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1637 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1638 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1639 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1640 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1641 if (j<temp_constraints) { 1642 PetscInt ii; 1643 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1644 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1645 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1646 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1647 for (k=0;k<temp_constraints-j;k++) { 1648 for (ii=0;ii<size_of_constraint;ii++) { 1649 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1650 } 1651 } 1652 } 1653 #else /* on missing GESVD */ 1654 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1655 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1656 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1657 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1658 #if !defined(PETSC_USE_COMPLEX) 1659 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1660 #else 1661 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1662 #endif 1663 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1664 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1665 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1666 k = temp_constraints; 1667 if (k > size_of_constraint) k = size_of_constraint; 1668 j = 0; 1669 while (j < k && singular_vals[k-j-1] < tol) j++; 1670 total_counts = total_counts-temp_constraints+k-j; 1671 valid_constraints = k-j; 1672 #endif /* on missing GESVD */ 1673 } 1674 /* setting change_of_basis flag is safe now */ 1675 if (boolforchange) { 1676 for (j=0;j<valid_constraints;j++) { 1677 PetscBTSet(change_basis,total_counts-j-1); 1678 } 1679 } 1680 } 1681 /* free index sets of faces, edges and vertices */ 1682 for (i=0;i<n_ISForFaces;i++) { 1683 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1684 } 1685 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1686 for (i=0;i<n_ISForEdges;i++) { 1687 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1688 } 1689 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1690 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1691 /* map temp_indices_to_constraint in boundary numbering */ 1692 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1693 if (i != temp_indices[total_counts]) { 1694 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1695 } 1696 1697 /* free workspace */ 1698 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1699 ierr = PetscFree(work);CHKERRQ(ierr); 1700 #if defined(PETSC_USE_COMPLEX) 1701 ierr = PetscFree(rwork);CHKERRQ(ierr); 1702 #endif 1703 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1704 #if defined(PETSC_MISSING_LAPACK_GESVD) 1705 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1706 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1707 #endif 1708 } 1709 for (k=0;k<nnsp_size;k++) { 1710 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1711 } 1712 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1713 1714 /* set quantities in pcbddc data structure and store previous primal size */ 1715 /* n_vertices defines the number of subdomain corners in the primal space */ 1716 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1717 olocal_primal_size = pcbddc->local_primal_size; 1718 pcbddc->local_primal_size = total_counts; 1719 pcbddc->n_vertices = n_vertices; 1720 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1721 1722 /* Create constraint matrix */ 1723 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1724 /* so we need to set it up properly either with or without change of basis */ 1725 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1726 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1727 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1728 /* array to compute a local numbering of constraints : vertices first then constraints */ 1729 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1730 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1731 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1732 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1733 /* auxiliary stuff for basis change */ 1734 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1735 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1736 1737 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1738 total_primal_vertices=0; 1739 for (i=0;i<pcbddc->local_primal_size;i++) { 1740 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1741 if (size_of_constraint == 1) { 1742 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1743 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1744 aux_primal_minloc[total_primal_vertices]=0; 1745 total_primal_vertices++; 1746 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1747 PetscInt min_loc,min_index; 1748 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1749 /* find first untouched local node */ 1750 k = 0; 1751 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1752 min_index = global_indices[k]; 1753 min_loc = k; 1754 /* search the minimum among global nodes already untouched on the cc */ 1755 for (k=1;k<size_of_constraint;k++) { 1756 /* there can be more than one constraint on a single connected component */ 1757 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1758 min_index = global_indices[k]; 1759 min_loc = k; 1760 } 1761 } 1762 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1763 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1764 aux_primal_minloc[total_primal_vertices]=min_loc; 1765 total_primal_vertices++; 1766 } 1767 } 1768 /* determine if a QR strategy is needed for change of basis */ 1769 qr_needed = PETSC_FALSE; 1770 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1771 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1772 if (PetscBTLookup(change_basis,i)) { 1773 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1774 j = 0; 1775 for (k=0;k<size_of_constraint;k++) { 1776 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1777 j++; 1778 } 1779 } 1780 /* found more than one primal dof on the cc */ 1781 if (j > 1) { 1782 PetscBTSet(qr_needed_idx,i); 1783 qr_needed = PETSC_TRUE; 1784 } 1785 } 1786 } 1787 /* free workspace */ 1788 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1789 1790 /* permute indices in order to have a sorted set of vertices */ 1791 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1792 1793 /* nonzero structure of constraint matrix */ 1794 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1795 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1796 j=total_primal_vertices; 1797 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1798 if (!PetscBTLookup(change_basis,i)) { 1799 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1800 j++; 1801 } 1802 } 1803 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1804 ierr = PetscFree(nnz);CHKERRQ(ierr); 1805 /* set values in constraint matrix */ 1806 for (i=0;i<total_primal_vertices;i++) { 1807 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1808 } 1809 total_counts = total_primal_vertices; 1810 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1811 if (!PetscBTLookup(change_basis,i)) { 1812 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1813 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1814 total_counts++; 1815 } 1816 } 1817 /* assembling */ 1818 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1819 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1820 /* 1821 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1822 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1823 */ 1824 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1825 if (pcbddc->use_change_of_basis) { 1826 /* dual and primal dofs on a single cc */ 1827 PetscInt dual_dofs,primal_dofs; 1828 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1829 PetscInt primal_counter; 1830 /* working stuff for GEQRF */ 1831 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1832 PetscBLASInt lqr_work; 1833 /* working stuff for UNGQR */ 1834 PetscScalar *gqr_work,lgqr_work_t; 1835 PetscBLASInt lgqr_work; 1836 /* working stuff for TRTRS */ 1837 PetscScalar *trs_rhs; 1838 PetscBLASInt Blas_NRHS; 1839 /* pointers for values insertion into change of basis matrix */ 1840 PetscInt *start_rows,*start_cols; 1841 PetscScalar *start_vals; 1842 /* working stuff for values insertion */ 1843 PetscBT is_primal; 1844 1845 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1846 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1847 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1848 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1849 /* work arrays */ 1850 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1851 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1852 /* nonzeros per row */ 1853 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1854 if (PetscBTLookup(change_basis,i)) { 1855 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1856 if (PetscBTLookup(qr_needed_idx,i)) { 1857 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1858 } else { 1859 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1860 /* get local primal index on the cc */ 1861 j = 0; 1862 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1863 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1864 } 1865 } 1866 } 1867 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1868 ierr = PetscFree(nnz);CHKERRQ(ierr); 1869 /* Set initial identity in the matrix */ 1870 for (i=0;i<pcis->n_B;i++) { 1871 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1872 } 1873 1874 if (pcbddc->dbg_flag) { 1875 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1876 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1877 } 1878 1879 1880 /* Now we loop on the constraints which need a change of basis */ 1881 /* 1882 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1883 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1884 1885 Basic blocks of change of basis matrix T computed 1886 1887 - Using the following block transformation if there is only a primal dof on the cc 1888 (in the example, primal dof is the last one of the edge in LOCAL ordering 1889 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1890 | 1 0 ... 0 1 | 1891 | 0 1 ... 0 1 | 1892 | ... | 1893 | 0 ... 1 1 | 1894 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1895 1896 - via QR decomposition of constraints otherwise 1897 */ 1898 if (qr_needed) { 1899 /* space to store Q */ 1900 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1901 /* first we issue queries for optimal work */ 1902 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1903 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1904 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1905 lqr_work = -1; 1906 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1907 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1908 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1909 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1910 lgqr_work = -1; 1911 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1912 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1913 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1914 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1915 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1916 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1917 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1918 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1919 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1920 /* array to store scaling factors for reflectors */ 1921 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1922 /* array to store rhs and solution of triangular solver */ 1923 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1924 /* allocating workspace for check */ 1925 if (pcbddc->dbg_flag) { 1926 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1927 } 1928 } 1929 /* array to store whether a node is primal or not */ 1930 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1931 ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr); 1932 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 1933 if (i != total_primal_vertices) { 1934 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 1935 } 1936 for (i=0;i<total_primal_vertices;i++) { 1937 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 1938 } 1939 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 1940 1941 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1942 /* -> using implicit ordering contained in temp_indices data */ 1943 total_counts = pcbddc->n_vertices; 1944 primal_counter = total_counts; 1945 while (total_counts<pcbddc->local_primal_size) { 1946 primal_dofs = 1; 1947 if (PetscBTLookup(change_basis,total_counts)) { 1948 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1949 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1950 primal_dofs++; 1951 } 1952 /* get constraint info */ 1953 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1954 dual_dofs = size_of_constraint-primal_dofs; 1955 1956 if (pcbddc->dbg_flag) { 1957 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1958 } 1959 1960 if (primal_dofs > 1) { /* QR */ 1961 1962 /* copy quadrature constraints for change of basis check */ 1963 if (pcbddc->dbg_flag) { 1964 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1965 } 1966 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1967 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1968 1969 /* compute QR decomposition of constraints */ 1970 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1971 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1972 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1973 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1974 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1975 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1976 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1977 1978 /* explictly compute R^-T */ 1979 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1980 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1981 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1982 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1983 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1984 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1985 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1986 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1987 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1988 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1989 1990 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1991 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1992 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1993 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1994 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1995 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1996 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1997 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1998 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1999 2000 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2001 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2002 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2003 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2004 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2005 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2006 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2007 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2008 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2009 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2010 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2011 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2012 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2013 2014 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2015 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2016 /* insert cols for primal dofs */ 2017 for (j=0;j<primal_dofs;j++) { 2018 start_vals = &qr_basis[j*size_of_constraint]; 2019 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2020 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2021 } 2022 /* insert cols for dual dofs */ 2023 for (j=0,k=0;j<dual_dofs;k++) { 2024 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2025 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2026 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2027 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2028 j++; 2029 } 2030 } 2031 2032 /* check change of basis */ 2033 if (pcbddc->dbg_flag) { 2034 PetscInt ii,jj; 2035 PetscBool valid_qr=PETSC_TRUE; 2036 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2037 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2038 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2039 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2040 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2041 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2042 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2043 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2044 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2045 for (jj=0;jj<size_of_constraint;jj++) { 2046 for (ii=0;ii<primal_dofs;ii++) { 2047 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2048 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2049 } 2050 } 2051 if (!valid_qr) { 2052 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2053 for (jj=0;jj<size_of_constraint;jj++) { 2054 for (ii=0;ii<primal_dofs;ii++) { 2055 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2056 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2057 } 2058 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2059 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2060 } 2061 } 2062 } 2063 } else { 2064 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2065 } 2066 } 2067 } else { /* simple transformation block */ 2068 PetscInt row,col; 2069 PetscScalar val; 2070 for (j=0;j<size_of_constraint;j++) { 2071 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2072 if (!PetscBTLookup(is_primal,row)) { 2073 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2074 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2075 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2076 } else { 2077 for (k=0;k<size_of_constraint;k++) { 2078 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2079 if (row != col) { 2080 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2081 } else { 2082 val = 1.0; 2083 } 2084 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2085 } 2086 } 2087 } 2088 } 2089 /* increment primal counter */ 2090 primal_counter += primal_dofs; 2091 } else { 2092 if (pcbddc->dbg_flag) { 2093 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2094 } 2095 } 2096 /* increment constraint counter total_counts */ 2097 total_counts += primal_dofs; 2098 } 2099 2100 /* free workspace */ 2101 if (qr_needed) { 2102 if (pcbddc->dbg_flag) { 2103 ierr = PetscFree(work);CHKERRQ(ierr); 2104 } 2105 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2106 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2107 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2108 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2109 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2110 } 2111 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2112 /* assembling */ 2113 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2114 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2115 /* 2116 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2117 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2118 */ 2119 } 2120 2121 /* get indices in local ordering for vertices and constraints */ 2122 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2123 ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr); 2124 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2125 } 2126 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2127 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2128 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2129 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2130 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2131 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2132 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2133 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2134 /* set quantities in PCBDDC data struct */ 2135 pcbddc->n_actual_vertices = i; 2136 /* check if a new primal space has been introduced */ 2137 pcbddc->new_primal_space_local = PETSC_TRUE; 2138 if (olocal_primal_size == pcbddc->local_primal_size) { 2139 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2140 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2141 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2142 } 2143 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2144 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2145 2146 /* flush dbg viewer */ 2147 if (pcbddc->dbg_flag) { 2148 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2149 } 2150 2151 /* free workspace */ 2152 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2153 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2154 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2155 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2156 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2157 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2158 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2159 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2160 PetscFunctionReturn(0); 2161 } 2162 2163 #undef __FUNCT__ 2164 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2165 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2166 { 2167 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2168 PC_IS *pcis = (PC_IS*)pc->data; 2169 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2170 PetscInt ierr,i,vertex_size; 2171 PetscViewer viewer=pcbddc->dbg_viewer; 2172 2173 PetscFunctionBegin; 2174 /* Reset previously computed graph */ 2175 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2176 /* Init local Graph struct */ 2177 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2178 2179 /* Check validity of the csr graph passed in by the user */ 2180 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2181 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2182 } 2183 2184 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2185 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2186 Mat mat_adj; 2187 const PetscInt *xadj,*adjncy; 2188 PetscBool flg_row=PETSC_TRUE; 2189 2190 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2191 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2192 if (!flg_row) { 2193 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2194 } 2195 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2196 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2197 if (!flg_row) { 2198 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2199 } 2200 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2201 } 2202 2203 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2204 vertex_size = 1; 2205 if (pcbddc->user_provided_isfordofs) { 2206 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2207 ierr = PetscMalloc(pcbddc->n_ISForDofs*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2208 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2209 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2210 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2211 } 2212 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2213 pcbddc->n_ISForDofs = 0; 2214 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2215 } 2216 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2217 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2218 } else { 2219 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2220 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2221 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2222 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2223 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2224 } 2225 } 2226 } 2227 2228 /* Setup of Graph */ 2229 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2230 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2231 } 2232 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2233 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2234 } 2235 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2236 2237 /* Graph's connected components analysis */ 2238 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2239 2240 /* print some info to stdout */ 2241 if (pcbddc->dbg_flag) { 2242 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2243 } 2244 2245 /* mark topography has done */ 2246 pcbddc->recompute_topography = PETSC_FALSE; 2247 PetscFunctionReturn(0); 2248 } 2249 2250 #undef __FUNCT__ 2251 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2252 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2253 { 2254 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2255 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2256 PetscErrorCode ierr; 2257 2258 PetscFunctionBegin; 2259 n = 0; 2260 vertices = 0; 2261 if (pcbddc->ConstraintMatrix) { 2262 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2263 for (i=0;i<local_primal_size;i++) { 2264 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2265 if (size_of_constraint == 1) n++; 2266 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2267 } 2268 if (vertices_idx) { 2269 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2270 n = 0; 2271 for (i=0;i<local_primal_size;i++) { 2272 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2273 if (size_of_constraint == 1) { 2274 vertices[n++]=row_cmat_indices[0]; 2275 } 2276 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2277 } 2278 } 2279 } 2280 *n_vertices = n; 2281 if (vertices_idx) *vertices_idx = vertices; 2282 PetscFunctionReturn(0); 2283 } 2284 2285 #undef __FUNCT__ 2286 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2287 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2288 { 2289 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2290 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2291 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2292 PetscBT touched; 2293 PetscErrorCode ierr; 2294 2295 /* This function assumes that the number of local constraints per connected component 2296 is not greater than the number of nodes defined for the connected component 2297 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2298 PetscFunctionBegin; 2299 n = 0; 2300 constraints_index = 0; 2301 if (pcbddc->ConstraintMatrix) { 2302 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2303 max_size_of_constraint = 0; 2304 for (i=0;i<local_primal_size;i++) { 2305 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2306 if (size_of_constraint > 1) { 2307 n++; 2308 } 2309 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2310 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2311 } 2312 if (constraints_idx) { 2313 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2314 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2315 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2316 n = 0; 2317 for (i=0;i<local_primal_size;i++) { 2318 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2319 if (size_of_constraint > 1) { 2320 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2321 /* find first untouched local node */ 2322 j = 0; 2323 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2324 min_index = row_cmat_global_indices[j]; 2325 min_loc = j; 2326 /* search the minimum among nodes not yet touched on the connected component 2327 since there can be more than one constraint on a single cc */ 2328 for (j=1;j<size_of_constraint;j++) { 2329 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2330 min_index = row_cmat_global_indices[j]; 2331 min_loc = j; 2332 } 2333 } 2334 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2335 constraints_index[n++] = row_cmat_indices[min_loc]; 2336 } 2337 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2338 } 2339 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2340 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2341 } 2342 } 2343 *n_constraints = n; 2344 if (constraints_idx) *constraints_idx = constraints_index; 2345 PetscFunctionReturn(0); 2346 } 2347 2348 /* the next two functions has been adapted from pcis.c */ 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "PCBDDCApplySchur" 2351 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2352 { 2353 PetscErrorCode ierr; 2354 PC_IS *pcis = (PC_IS*)(pc->data); 2355 2356 PetscFunctionBegin; 2357 if (!vec2_B) { vec2_B = v; } 2358 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2359 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2360 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2361 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2362 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2363 PetscFunctionReturn(0); 2364 } 2365 2366 #undef __FUNCT__ 2367 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2368 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2369 { 2370 PetscErrorCode ierr; 2371 PC_IS *pcis = (PC_IS*)(pc->data); 2372 2373 PetscFunctionBegin; 2374 if (!vec2_B) { vec2_B = v; } 2375 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2376 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2377 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2378 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2379 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "PCBDDCSubsetNumbering" 2385 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2386 { 2387 Vec local_vec,global_vec; 2388 IS seqis,paris; 2389 VecScatter scatter_ctx; 2390 PetscScalar *array; 2391 PetscInt *temp_global_dofs; 2392 PetscScalar globalsum; 2393 PetscInt i,j,s; 2394 PetscInt nlocals,first_index,old_index,max_local; 2395 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2396 PetscMPIInt *dof_sizes,*dof_displs; 2397 PetscBool first_found; 2398 PetscErrorCode ierr; 2399 2400 PetscFunctionBegin; 2401 /* mpi buffers */ 2402 MPI_Comm_size(comm,&size_prec_comm); 2403 MPI_Comm_rank(comm,&rank_prec_comm); 2404 j = ( !rank_prec_comm ? size_prec_comm : 0); 2405 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2406 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2407 /* get maximum size of subset */ 2408 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2409 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2410 max_local = 0; 2411 if (n_local_dofs) { 2412 max_local = temp_global_dofs[0]; 2413 for (i=1;i<n_local_dofs;i++) { 2414 if (max_local < temp_global_dofs[i] ) { 2415 max_local = temp_global_dofs[i]; 2416 } 2417 } 2418 } 2419 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2420 max_global++; 2421 max_local = 0; 2422 if (n_local_dofs) { 2423 max_local = local_dofs[0]; 2424 for (i=1;i<n_local_dofs;i++) { 2425 if (max_local < local_dofs[i] ) { 2426 max_local = local_dofs[i]; 2427 } 2428 } 2429 } 2430 max_local++; 2431 /* allocate workspace */ 2432 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2433 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2434 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2435 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2436 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2437 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2438 /* create scatter */ 2439 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2440 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2441 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2442 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2443 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2444 /* init array */ 2445 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2446 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2447 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2448 if (local_dofs_mult) { 2449 for (i=0;i<n_local_dofs;i++) { 2450 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2451 } 2452 } else { 2453 for (i=0;i<n_local_dofs;i++) { 2454 array[local_dofs[i]]=1.0; 2455 } 2456 } 2457 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2458 /* scatter into global vec and get total number of global dofs */ 2459 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2460 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2461 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2462 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2463 /* Fill global_vec with cumulative function for global numbering */ 2464 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2465 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2466 nlocals = 0; 2467 first_index = -1; 2468 first_found = PETSC_FALSE; 2469 for (i=0;i<s;i++) { 2470 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2471 first_found = PETSC_TRUE; 2472 first_index = i; 2473 } 2474 nlocals += (PetscInt)PetscRealPart(array[i]); 2475 } 2476 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2477 if (!rank_prec_comm) { 2478 dof_displs[0]=0; 2479 for (i=1;i<size_prec_comm;i++) { 2480 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2481 } 2482 } 2483 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2484 if (first_found) { 2485 array[first_index] += (PetscScalar)nlocals; 2486 old_index = first_index; 2487 for (i=first_index+1;i<s;i++) { 2488 if (PetscRealPart(array[i]) > 0.0) { 2489 array[i] += array[old_index]; 2490 old_index = i; 2491 } 2492 } 2493 } 2494 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2495 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2496 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2497 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2498 /* get global ordering of local dofs */ 2499 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2500 if (local_dofs_mult) { 2501 for (i=0;i<n_local_dofs;i++) { 2502 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2503 } 2504 } else { 2505 for (i=0;i<n_local_dofs;i++) { 2506 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2507 } 2508 } 2509 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2510 /* free workspace */ 2511 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2512 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2513 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2514 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2515 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2516 /* return pointer to global ordering of local dofs */ 2517 *global_numbering_subset = temp_global_dofs; 2518 PetscFunctionReturn(0); 2519 } 2520 2521 #undef __FUNCT__ 2522 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2523 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2524 { 2525 PetscInt i,j; 2526 PetscScalar *alphas; 2527 PetscErrorCode ierr; 2528 2529 PetscFunctionBegin; 2530 /* this implements stabilized Gram-Schmidt */ 2531 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2532 for (i=0;i<n;i++) { 2533 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2534 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2535 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2536 } 2537 ierr = PetscFree(alphas);CHKERRQ(ierr); 2538 PetscFunctionReturn(0); 2539 } 2540 2541 /* TODO 2542 - now preallocation is done assuming SEQDENSE local matrices 2543 */ 2544 #undef __FUNCT__ 2545 #define __FUNCT__ "MatISGetMPIXAIJ" 2546 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M) 2547 { 2548 Mat new_mat; 2549 Mat_IS *matis = (Mat_IS*)(mat->data); 2550 /* info on mat */ 2551 /* ISLocalToGlobalMapping rmapping,cmapping; */ 2552 PetscInt bs,rows,cols; 2553 PetscInt lrows,lcols; 2554 PetscInt local_rows,local_cols; 2555 PetscBool isdense; 2556 /* values insertion */ 2557 PetscScalar *array; 2558 PetscInt *local_indices,*global_indices; 2559 /* work */ 2560 PetscInt i,j,index_row; 2561 PetscErrorCode ierr; 2562 2563 PetscFunctionBegin; 2564 /* MISSING CHECKS 2565 - rectangular case not covered (it is not allowed by MATIS) 2566 */ 2567 /* get info from mat */ 2568 /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */ 2569 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2570 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2571 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2572 2573 /* work */ 2574 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2575 for (i=0;i<local_rows;i++) local_indices[i]=i; 2576 /* map indices of local mat to global values */ 2577 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2578 /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */ 2579 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2580 2581 if (reuse==MAT_INITIAL_MATRIX) { 2582 Vec vec_dnz,vec_onz; 2583 PetscScalar *my_dnz,*my_onz; 2584 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2585 PetscInt index_col,owner; 2586 PetscMPIInt nsubdomains; 2587 2588 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2589 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2590 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2591 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2592 ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr); 2593 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2594 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2595 2596 /* 2597 preallocation 2598 */ 2599 2600 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2601 /* 2602 Some vectors are needed to sum up properly on shared interface dofs. 2603 Preallocation macros cannot do the job. 2604 Note that preallocation is not exact, since it overestimates nonzeros 2605 */ 2606 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2607 /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */ 2608 ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr); 2609 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2610 /* All processes need to compute entire row ownership */ 2611 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2612 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2613 for (i=0;i<nsubdomains;i++) { 2614 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2615 row_ownership[j]=i; 2616 } 2617 } 2618 2619 /* 2620 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2621 then, they will be summed up properly. This way, preallocation is always sufficient 2622 */ 2623 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2624 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2625 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2626 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2627 for (i=0;i<local_rows;i++) { 2628 index_row = global_indices[i]; 2629 for (j=i;j<local_rows;j++) { 2630 owner = row_ownership[index_row]; 2631 index_col = global_indices[j]; 2632 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2633 my_dnz[i] += 1.0; 2634 } else { /* offdiag block */ 2635 my_onz[i] += 1.0; 2636 } 2637 /* same as before, interchanging rows and cols */ 2638 if (i != j) { 2639 owner = row_ownership[index_col]; 2640 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2641 my_dnz[j] += 1.0; 2642 } else { 2643 my_onz[j] += 1.0; 2644 } 2645 } 2646 } 2647 } 2648 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2649 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2650 if (local_rows) { /* multilevel guard */ 2651 ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2652 ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2653 } 2654 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2655 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2656 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2657 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2658 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2659 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2660 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2661 2662 /* set computed preallocation in dnz and onz */ 2663 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2664 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2665 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2666 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2667 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2668 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2669 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2670 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2671 2672 /* Resize preallocation if overestimated */ 2673 for (i=0;i<lrows;i++) { 2674 dnz[i] = PetscMin(dnz[i],lcols); 2675 onz[i] = PetscMin(onz[i],cols-lcols); 2676 } 2677 /* set preallocation */ 2678 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2679 for (i=0;i<lrows/bs;i++) { 2680 dnz[i] = dnz[i*bs]/bs; 2681 onz[i] = onz[i*bs]/bs; 2682 } 2683 ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2684 for (i=0;i<lrows/bs;i++) { 2685 dnz[i] = dnz[i]-i; 2686 } 2687 ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2688 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2689 *M = new_mat; 2690 } else { 2691 PetscInt mbs,mrows,mcols; 2692 /* some checks */ 2693 ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); 2694 ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); 2695 if (mrows != rows) { 2696 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); 2697 } 2698 if (mrows != rows) { 2699 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); 2700 } 2701 if (mbs != bs) { 2702 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); 2703 } 2704 ierr = MatZeroEntries(*M);CHKERRQ(ierr); 2705 } 2706 /* set local to global mappings */ 2707 /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */ 2708 /* Set values */ 2709 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2710 if (isdense) { /* special case for dense local matrices */ 2711 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2712 ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr); 2713 ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2714 ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr); 2715 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2716 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2717 } else { /* very basic values insertion for all other matrix types */ 2718 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2719 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2720 for (i=0;i<local_rows;i++) { 2721 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2722 /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */ 2723 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2724 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2725 ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2726 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2727 } 2728 } 2729 ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2730 ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2731 if (isdense) { 2732 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2733 } 2734 PetscFunctionReturn(0); 2735 } 2736 2737 #undef __FUNCT__ 2738 #define __FUNCT__ "MatISGetSubassemblingPattern" 2739 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2740 { 2741 Mat subdomain_adj; 2742 IS new_ranks,ranks_send_to; 2743 MatPartitioning partitioner; 2744 Mat_IS *matis; 2745 PetscInt n_neighs,*neighs,*n_shared,**shared; 2746 PetscInt prank; 2747 PetscMPIInt size,rank,color; 2748 PetscInt *xadj,*adjncy,*oldranks; 2749 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2750 PetscInt i,j,n_subdomains,local_size,threshold=0; 2751 PetscErrorCode ierr; 2752 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2753 PetscSubcomm subcomm; 2754 2755 PetscFunctionBegin; 2756 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2757 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2758 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2759 2760 /* Get info on mapping */ 2761 matis = (Mat_IS*)(mat->data); 2762 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2763 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2764 2765 /* build local CSR graph of subdomains' connectivity */ 2766 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2767 xadj[0] = 0; 2768 xadj[1] = PetscMax(n_neighs-1,0); 2769 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2770 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2771 2772 if (threshold) { 2773 PetscInt* count,min_threshold; 2774 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2775 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2776 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2777 for (j=0;j<n_shared[i];j++) { 2778 count[shared[i][j]] += 1; 2779 } 2780 } 2781 /* adapt threshold since we dont want to lose significant connections */ 2782 min_threshold = n_neighs; 2783 for (i=1;i<n_neighs;i++) { 2784 for (j=0;j<n_shared[i];j++) { 2785 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2786 } 2787 } 2788 threshold = PetscMax(min_threshold+1,threshold); 2789 xadj[1] = 0; 2790 for (i=1;i<n_neighs;i++) { 2791 for (j=0;j<n_shared[i];j++) { 2792 if (count[shared[i][j]] < threshold) { 2793 adjncy[xadj[1]] = neighs[i]; 2794 adjncy_wgt[xadj[1]] = n_shared[i]; 2795 xadj[1]++; 2796 break; 2797 } 2798 } 2799 } 2800 ierr = PetscFree(count);CHKERRQ(ierr); 2801 } else { 2802 if (xadj[1]) { 2803 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2804 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2805 } 2806 } 2807 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2808 if (use_square) { 2809 for (i=0;i<xadj[1];i++) { 2810 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2811 } 2812 } 2813 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2814 2815 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2816 2817 /* 2818 Restrict work on active processes only. 2819 */ 2820 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2821 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2822 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2823 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2824 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2825 if (color) { 2826 ierr = PetscFree(xadj);CHKERRQ(ierr); 2827 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2828 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2829 } else { 2830 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2831 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2832 prank = rank; 2833 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2834 /* 2835 for (i=0;i<size;i++) { 2836 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2837 } 2838 */ 2839 for (i=0;i<xadj[1];i++) { 2840 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2841 } 2842 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2843 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2844 n_subdomains = (PetscInt)size; 2845 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2846 2847 /* Partition */ 2848 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2849 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2850 if (use_vwgt) { 2851 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2852 v_wgt[0] = local_size; 2853 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2854 } 2855 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); */ 2856 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2857 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2858 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2859 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2860 2861 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2862 /* ranks_send_to_idx[0] = oldranks[is_indices[0]]; */ /* contiguos set of processes */ 2863 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2864 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2865 /* clean up */ 2866 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2867 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2868 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2869 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2870 } 2871 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2872 2873 /* assemble parallel IS for sends */ 2874 i = 1; 2875 if (color) i=0; 2876 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2877 2878 /* get back IS */ 2879 *is_sends = ranks_send_to; 2880 PetscFunctionReturn(0); 2881 } 2882 2883 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2884 2885 #undef __FUNCT__ 2886 #define __FUNCT__ "MatISSubassemble" 2887 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, MatReuse reuse, Mat *mat_n) 2888 { 2889 Mat local_mat; 2890 Mat_IS *matis; 2891 IS is_sends_internal; 2892 PetscInt rows,cols; 2893 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2894 PetscBool ismatis,isdense; 2895 ISLocalToGlobalMapping l2gmap; 2896 PetscInt* l2gmap_indices; 2897 const PetscInt* is_indices; 2898 MatType new_local_type; 2899 MatTypePrivate new_local_type_private; 2900 /* buffers */ 2901 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2902 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2903 /* MPI */ 2904 MPI_Comm comm; 2905 PetscMPIInt n_sends,n_recvs,commsize; 2906 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2907 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2908 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2909 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2910 PetscErrorCode ierr; 2911 2912 PetscFunctionBegin; 2913 /* checks */ 2914 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2915 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2916 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2917 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2918 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2919 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2920 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2921 if (reuse == MAT_REUSE_MATRIX) { 2922 PetscInt mrows,mcols,mnrows,mncols; 2923 PetscBool ismatis; 2924 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2925 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2926 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2927 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2928 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2929 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2930 } 2931 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2932 PetscValidLogicalCollectiveInt(mat,bs,0); 2933 /* prepare IS for sending if not provided */ 2934 if (!is_sends) { 2935 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2936 ierr = MatISGetSubassemblingPattern(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2937 } else { 2938 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2939 is_sends_internal = is_sends; 2940 } 2941 2942 /* get pointer of MATIS data */ 2943 matis = (Mat_IS*)mat->data; 2944 2945 /* get comm */ 2946 comm = PetscObjectComm((PetscObject)mat); 2947 2948 /* compute number of sends */ 2949 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2950 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2951 2952 /* compute number of receives */ 2953 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2954 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2955 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2956 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2957 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2958 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2959 ierr = PetscFree(iflags);CHKERRQ(ierr); 2960 2961 /* prepare send/receive buffers */ 2962 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2963 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2964 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2965 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2966 2967 /* Get data from local mat */ 2968 if (!isdense) { 2969 /* TODO: See below some guidelines on how to prepare the local buffers */ 2970 /* 2971 send_buffer_vals should contain the raw values of the local matrix 2972 send_buffer_idxs should contain: 2973 - MatType_PRIVATE type 2974 - PetscInt size_of_l2gmap 2975 - PetscInt global_row_indices[size_of_l2gmap] 2976 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2977 */ 2978 } else { 2979 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2980 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2981 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2982 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2983 send_buffer_idxs[1] = i; 2984 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2985 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2986 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2987 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2988 for (i=0;i<n_sends;i++) { 2989 ilengths_vals[is_indices[i]] = len*len; 2990 ilengths_idxs[is_indices[i]] = len+2; 2991 } 2992 } 2993 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2994 buf_size_idxs = 0; 2995 buf_size_vals = 0; 2996 for (i=0;i<n_recvs;i++) { 2997 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2998 buf_size_vals += (PetscInt)olengths_vals[i]; 2999 } 3000 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 3001 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 3002 3003 /* get new tags for clean communications */ 3004 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3005 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3006 3007 /* allocate for requests */ 3008 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 3009 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 3010 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 3011 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 3012 3013 /* communications */ 3014 ptr_idxs = recv_buffer_idxs; 3015 ptr_vals = recv_buffer_vals; 3016 for (i=0;i<n_recvs;i++) { 3017 source_dest = onodes[i]; 3018 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3019 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3020 ptr_idxs += olengths_idxs[i]; 3021 ptr_vals += olengths_vals[i]; 3022 } 3023 for (i=0;i<n_sends;i++) { 3024 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3025 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3026 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3027 } 3028 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3029 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3030 3031 /* assemble new l2g map */ 3032 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3033 ptr_idxs = recv_buffer_idxs; 3034 buf_size_idxs = 0; 3035 for (i=0;i<n_recvs;i++) { 3036 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3037 ptr_idxs += olengths_idxs[i]; 3038 } 3039 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 3040 ptr_idxs = recv_buffer_idxs; 3041 buf_size_idxs = 0; 3042 for (i=0;i<n_recvs;i++) { 3043 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3044 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3045 ptr_idxs += olengths_idxs[i]; 3046 } 3047 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3048 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3049 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3050 3051 /* infer new local matrix type from received local matrices type */ 3052 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3053 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3054 new_local_type_private = MATAIJ_PRIVATE; 3055 new_local_type = MATSEQAIJ; 3056 if (n_recvs) { 3057 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3058 ptr_idxs = recv_buffer_idxs; 3059 for (i=0;i<n_recvs;i++) { 3060 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3061 new_local_type_private = MATAIJ_PRIVATE; 3062 break; 3063 } 3064 ptr_idxs += olengths_idxs[i]; 3065 } 3066 switch (new_local_type_private) { 3067 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 3068 new_local_type = MATSEQAIJ; 3069 bs = 1; 3070 break; 3071 case MATAIJ_PRIVATE: 3072 new_local_type = MATSEQAIJ; 3073 bs = 1; 3074 break; 3075 case MATBAIJ_PRIVATE: 3076 new_local_type = MATSEQBAIJ; 3077 break; 3078 case MATSBAIJ_PRIVATE: 3079 new_local_type = MATSEQSBAIJ; 3080 break; 3081 default: 3082 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3083 break; 3084 } 3085 } 3086 3087 /* create MATIS object if needed */ 3088 if (reuse == MAT_INITIAL_MATRIX) { 3089 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3090 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3091 } else { 3092 /* it also destroys the local matrices */ 3093 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3094 } 3095 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3096 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3097 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3098 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3099 3100 /* set values */ 3101 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3102 ptr_vals = recv_buffer_vals; 3103 ptr_idxs = recv_buffer_idxs; 3104 for (i=0;i<n_recvs;i++) { 3105 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3106 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3107 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3108 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3109 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3110 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3111 } 3112 ptr_idxs += olengths_idxs[i]; 3113 ptr_vals += olengths_vals[i]; 3114 } 3115 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3116 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3117 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3118 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3119 3120 { /* check */ 3121 Vec lvec,rvec; 3122 PetscReal infty_error; 3123 3124 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3125 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3126 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3127 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3128 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3129 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3130 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3131 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3132 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3133 } 3134 3135 /* free workspace */ 3136 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3137 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3138 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3139 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3140 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3141 if (isdense) { 3142 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3143 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3144 } else { 3145 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3146 } 3147 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3148 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3149 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3150 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3151 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3152 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3153 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3154 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3155 ierr = PetscFree(onodes);CHKERRQ(ierr); 3156 PetscFunctionReturn(0); 3157 } 3158 3159 #undef __FUNCT__ 3160 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3161 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3162 { 3163 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3164 PC_IS *pcis = (PC_IS*)pc->data; 3165 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3166 MatNullSpace CoarseNullSpace=NULL; 3167 ISLocalToGlobalMapping coarse_islg; 3168 IS coarse_is; 3169 PetscInt max_it; 3170 PetscInt im_active=-1,active_procs=-1; 3171 PC pc_temp; 3172 PCType coarse_pc_type; 3173 KSPType coarse_ksp_type; 3174 PetscBool multilevel_requested,multilevel_allowed; 3175 PetscBool setsym,issym,isherm,ispreonly,isbddc,isnn,coarse_reuse; 3176 MatStructure matstruct; 3177 PetscErrorCode ierr; 3178 3179 PetscFunctionBegin; 3180 /* Assign global numbering to coarse dofs */ 3181 if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */ 3182 PetscInt ocoarse_size; 3183 ocoarse_size = pcbddc->coarse_size; 3184 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3185 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3186 /* see if we can avoid some work */ 3187 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3188 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3189 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3190 coarse_reuse = PETSC_FALSE; 3191 } else { /* we can safely reuse already computed coarse matrix */ 3192 coarse_reuse = PETSC_TRUE; 3193 } 3194 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3195 coarse_reuse = PETSC_FALSE; 3196 } 3197 /* reset any subassembling information */ 3198 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3199 } else { /* primal space has not been changed, so we can reuse coarse matrix */ 3200 coarse_reuse = PETSC_TRUE; 3201 } 3202 3203 /* test if we can go multilevel */ 3204 multilevel_allowed = PETSC_FALSE; 3205 multilevel_requested = PETSC_FALSE; 3206 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 3207 if (multilevel_requested) { 3208 /* count "active processes" */ 3209 im_active = !!(pcis->n); 3210 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3211 if (active_procs/pcbddc->coarsening_ratio < 2) { 3212 multilevel_allowed = PETSC_FALSE; 3213 } else { 3214 multilevel_allowed = PETSC_TRUE; 3215 } 3216 } 3217 3218 /* set defaults for coarse KSP and PC */ 3219 if (multilevel_allowed) { 3220 coarse_ksp_type = KSPRICHARDSON; 3221 coarse_pc_type = PCBDDC; 3222 } else { 3223 coarse_ksp_type = KSPPREONLY; 3224 coarse_pc_type = PCREDUNDANT; 3225 } 3226 3227 /* create the coarse KSP object only once with defaults */ 3228 if (!pcbddc->coarse_ksp) { 3229 char prefix[256],str_level[3]; 3230 size_t len; 3231 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3232 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3233 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3234 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3235 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3236 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3237 /* prefix */ 3238 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3239 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3240 if (!pcbddc->current_level) { 3241 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3242 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3243 } else { 3244 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3245 if (pcbddc->current_level>1) len -= 2; 3246 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3247 *(prefix+len)='\0'; 3248 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3249 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3250 } 3251 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3252 } 3253 /* allow user customization */ 3254 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3255 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3256 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3257 3258 /* get some info after set from options */ 3259 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3260 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3261 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3262 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3263 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3264 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3265 isbddc = PETSC_FALSE; 3266 } 3267 3268 /* creates temporary l2gmap and IS for coarse indexes */ 3269 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3270 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3271 3272 /* propagate BDDC info to the next level */ 3273 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3274 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3275 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3276 if (isbddc) { /* protects from unneded computations */ 3277 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3278 const PetscInt *idxs; 3279 IS *c_isfordofs,c_isneumann; 3280 ISLocalToGlobalMapping tmap; 3281 3282 /* create map between primal indices (in local representative ordering) and local subdomain numbering */ 3283 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3284 /* allocate space for temporary storage */ 3285 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3286 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3287 /* dofs splitting */ 3288 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&c_isfordofs);CHKERRQ(ierr); 3289 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3290 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3291 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3292 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3293 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3294 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3295 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3296 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&c_isfordofs[i]);CHKERRQ(ierr); 3297 /* ierr = ISView(c_isfordofs[i],0);CHKERRQ(ierr); */ 3298 } 3299 ierr = PCBDDCSetDofsSplitting(pc_temp,pcbddc->n_ISForDofsLocal,c_isfordofs);CHKERRQ(ierr); 3300 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3301 ierr = ISDestroy(&c_isfordofs[i]);CHKERRQ(ierr); 3302 } 3303 ierr = PetscFree(c_isfordofs);CHKERRQ(ierr); 3304 /* neumann boundaries */ 3305 if (pcbddc->NeumannBoundariesLocal) { 3306 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3307 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3308 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3309 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3310 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3311 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3312 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&c_isneumann);CHKERRQ(ierr); 3313 /* ierr = ISView(c_isneumann,0);CHKERRQ(ierr); */ 3314 ierr = PCBDDCSetNeumannBoundaries(pc_temp,c_isneumann);CHKERRQ(ierr); 3315 } 3316 ierr = ISDestroy(&c_isneumann);CHKERRQ(ierr); 3317 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3318 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3319 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3320 } 3321 3322 /* creates temporary MATIS object for coarse matrix */ 3323 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3324 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3325 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3326 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3327 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3328 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3329 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3330 3331 /* assemble coarse matrix */ 3332 if (isbddc || isnn) { 3333 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3334 ierr = MatISGetSubassemblingPattern(coarse_mat_is,pcbddc->coarsening_ratio,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3335 if (pcbddc->dbg_flag) { 3336 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3337 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3338 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 3339 } 3340 } 3341 if (coarse_reuse) { 3342 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3343 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3344 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,pcbddc->coarsening_ratio,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr); 3345 } else { 3346 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,pcbddc->coarsening_ratio,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3347 } 3348 } else { 3349 if (coarse_reuse) { 3350 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3351 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3352 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr); 3353 } else { 3354 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3355 } 3356 } 3357 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3358 3359 /* create local to global scatters for coarse problem */ 3360 if (pcbddc->new_primal_space) { 3361 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3362 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 3363 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3364 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3365 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3366 } 3367 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3368 3369 /* propagate symmetry info to coarse matrix */ 3370 issym = PETSC_FALSE; 3371 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3372 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3373 isherm = PETSC_FALSE; 3374 ierr = MatIsHermitianKnown(pc->pmat,&setsym,&isherm);CHKERRQ(ierr); 3375 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3376 3377 /* Compute coarse null space (special handling by BDDC only) */ 3378 if (pcbddc->NullSpace) { 3379 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3380 if (isbddc) { 3381 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3382 } else { 3383 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3384 } 3385 } 3386 3387 /* set operators */ 3388 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3389 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3390 3391 /* additional KSP customization */ 3392 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3393 if (max_it < 5) { 3394 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3395 } 3396 3397 /* print some info if requested */ 3398 if (pcbddc->dbg_flag) { 3399 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3400 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3401 if (!multilevel_allowed) { 3402 if (multilevel_requested) { 3403 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3404 } else if (pcbddc->max_levels) { 3405 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3406 } 3407 } 3408 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3409 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3410 } 3411 3412 /* setup coarse ksp */ 3413 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3414 3415 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3416 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3417 if (pcbddc->dbg_flag || !ispreonly) { 3418 KSP check_ksp; 3419 KSPType check_ksp_type; 3420 PC check_pc; 3421 Vec check_vec; 3422 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3423 PetscInt its; 3424 PetscBool compute; 3425 3426 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3427 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3428 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3429 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3430 if (ispreonly) { 3431 check_ksp_type = KSPPREONLY; 3432 compute = PETSC_FALSE; 3433 } else { 3434 check_ksp_type = KSPGMRES; 3435 compute = PETSC_TRUE; 3436 } 3437 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3438 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3439 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3440 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3441 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3442 /* create random vec */ 3443 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3444 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3445 if (CoarseNullSpace) { 3446 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3447 } 3448 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3449 /* solve coarse problem */ 3450 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3451 if (CoarseNullSpace) { 3452 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3453 } 3454 /* set eigenvalue estimation if preonly has not been requested */ 3455 if (!ispreonly) { 3456 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3457 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Coarse problem eigenvalues: %1.6e %1.6e\n",lambda_min,lambda_max);CHKERRQ(ierr); */ 3458 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3459 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3460 } 3461 /* check coarse problem residual error */ 3462 if (pcbddc->dbg_flag) { 3463 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3464 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3465 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3466 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3467 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3468 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3469 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3470 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3471 if (!ispreonly) { 3472 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3473 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3474 } 3475 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3476 } 3477 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3478 } 3479 3480 /* print additional info */ 3481 if (pcbddc->dbg_flag) { 3482 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3483 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3484 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3485 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3486 } 3487 3488 /* free memory */ 3489 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3490 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3491 PetscFunctionReturn(0); 3492 } 3493 3494 #undef __FUNCT__ 3495 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3496 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3497 { 3498 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3499 PC_IS* pcis = (PC_IS*)pc->data; 3500 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3501 PetscInt i,coarse_size; 3502 PetscInt *local_primal_indices; 3503 PetscErrorCode ierr; 3504 3505 PetscFunctionBegin; 3506 /* Compute global number of coarse dofs */ 3507 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3508 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3509 } 3510 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3511 3512 /* check numbering */ 3513 if (pcbddc->dbg_flag) { 3514 PetscScalar coarsesum,*array; 3515 3516 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3517 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3518 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3519 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3520 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3521 for (i=0;i<pcbddc->local_primal_size;i++) { 3522 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3523 } 3524 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3525 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3526 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3527 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3528 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3529 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3530 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3531 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3532 for (i=0;i<pcis->n;i++) { 3533 if (array[i] == 1.0) { 3534 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3535 } 3536 } 3537 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3538 for (i=0;i<pcis->n;i++) { 3539 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3540 } 3541 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3542 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3543 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3544 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3545 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3546 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3547 if (pcbddc->dbg_flag > 1) { 3548 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3549 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3550 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3551 for (i=0;i<pcbddc->local_primal_size;i++) { 3552 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3553 } 3554 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3555 } 3556 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3557 } 3558 /* get back data */ 3559 *coarse_size_n = coarse_size; 3560 *local_primal_indices_n = local_primal_indices; 3561 PetscFunctionReturn(0); 3562 } 3563 3564 #undef __FUNCT__ 3565 #define __FUNCT__ "PCBDDCGlobalToLocal" 3566 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis) 3567 { 3568 PC_IS* pcis = (PC_IS*)pc->data; 3569 IS localis_t; 3570 PetscInt i,lsize,*idxs; 3571 PetscScalar *vals; 3572 PetscErrorCode ierr; 3573 3574 PetscFunctionBegin; 3575 /* get dirichlet indices in local ordering exploiting local to global map */ 3576 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 3577 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3578 for (i=0;i<lsize;i++) vals[i] = 1.0; 3579 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3580 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3581 if (idxs) { /* multilevel guard */ 3582 ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3583 } 3584 ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr); 3585 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3586 ierr = PetscFree(vals);CHKERRQ(ierr); 3587 ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr); 3588 /* now compute dirichlet set in local ordering */ 3589 ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3590 ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3591 ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3592 for (i=0,lsize=0;i<pcis->n;i++) { 3593 if (vals[i] == 1.0) { 3594 lsize++; 3595 } 3596 } 3597 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 3598 for (i=0,lsize=0;i<pcis->n;i++) { 3599 if (vals[i] == 1.0) { 3600 idxs[lsize++] = i; 3601 } 3602 } 3603 ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3604 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 3605 *localis = localis_t; 3606 PetscFunctionReturn(0); 3607 } 3608