1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 76 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCResetSolvers" 82 PetscErrorCode PCBDDCResetSolvers(PC pc) 83 { 84 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 89 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 94 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 100 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 101 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 104 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 107 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 108 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 110 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 111 PetscFunctionReturn(0); 112 } 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 116 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 117 { 118 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 119 PC_IS *pcis = (PC_IS*)pc->data; 120 VecType impVecType; 121 PetscInt n_constraints,n_R,old_size; 122 PetscErrorCode ierr; 123 124 PetscFunctionBegin; 125 if (!pcbddc->ConstraintMatrix) { 126 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 127 } 128 /* get sizes */ 129 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 130 n_R = pcis->n-pcbddc->n_actual_vertices; 131 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 132 /* local work vectors (try to avoid unneeded work)*/ 133 /* R nodes */ 134 old_size = -1; 135 if (pcbddc->vec1_R) { 136 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 137 } 138 if (n_R != old_size) { 139 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 140 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 141 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 142 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 143 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 144 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 145 } 146 /* local primal dofs */ 147 old_size = -1; 148 if (pcbddc->vec1_P) { 149 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 150 } 151 if (pcbddc->local_primal_size != old_size) { 152 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 153 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 154 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 155 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 156 } 157 /* local explicit constraints */ 158 old_size = -1; 159 if (pcbddc->vec1_C) { 160 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 161 } 162 if (n_constraints && n_constraints != old_size) { 163 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 164 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 165 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 166 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNCT__ 172 #define __FUNCT__ "PCBDDCSetUpCorrection" 173 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 174 { 175 PetscErrorCode ierr; 176 /* pointers to pcis and pcbddc */ 177 PC_IS* pcis = (PC_IS*)pc->data; 178 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 179 /* submatrices of local problem */ 180 Mat A_RV,A_VR,A_VV; 181 /* working matrices */ 182 Mat M1,M2,M3,C_CR; 183 /* working vectors */ 184 Vec vec1_C,vec2_C,vec1_V,vec2_V; 185 /* additional working stuff */ 186 IS is_aux; 187 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 188 const PetscScalar *array,*row_cmat_values; 189 const PetscInt *row_cmat_indices,*idx_R_local; 190 PetscInt *idx_V_B,*auxindices; 191 PetscInt n_vertices,n_constraints,size_of_constraint; 192 PetscInt i,j,n_R,n_D,n_B; 193 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check; 194 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 195 MatType impMatType; 196 /* some shortcuts to scalars */ 197 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 198 /* for debugging purposes */ 199 PetscReal *coarsefunctions_errors,*constraints_errors; 200 201 PetscFunctionBegin; 202 /* get number of vertices (corners plus constraints with change of basis) 203 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 204 n_vertices = pcbddc->n_actual_vertices; 205 n_constraints = pcbddc->local_primal_size-n_vertices; 206 /* Set Non-overlapping dimensions */ 207 n_B = pcis->n_B; n_D = pcis->n - n_B; 208 n_R = pcis->n-n_vertices; 209 210 /* Set types for local objects needed by BDDC precondtioner */ 211 impMatType = MATSEQDENSE; 212 213 /* Allocating some extra storage just to be safe */ 214 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 215 for (i=0;i<pcis->n;i++) auxindices[i]=i; 216 217 /* vertices in boundary numbering */ 218 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 219 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 220 if (i != n_vertices) { 221 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 222 } 223 224 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 225 if (n_constraints) { 226 /* see if we can save some allocations */ 227 if (pcbddc->local_auxmat2) { 228 PetscInt on_R,on_constraints; 229 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 230 if (on_R != n_R || on_constraints != n_constraints) { 231 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 232 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 233 } 234 } 235 /* work vectors */ 236 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 237 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 238 /* auxiliary matrices */ 239 if (!pcbddc->local_auxmat2) { 240 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 241 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 242 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 243 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 244 } 245 246 /* Extract constraints on R nodes: C_{CR} */ 247 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 248 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 249 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 250 251 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 252 for (i=0;i<n_constraints;i++) { 253 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 254 /* Get row of constraint matrix in R numbering */ 255 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 256 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 257 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 258 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 259 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 260 /* Solve for row of constraint matrix in R numbering */ 261 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 262 /* Set values in local_auxmat2 */ 263 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 264 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 265 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 266 } 267 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 268 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 269 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 270 271 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 272 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 273 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 274 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 275 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 276 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 277 ierr = MatSetUp(M1);CHKERRQ(ierr); 278 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 279 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 280 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 281 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 282 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 283 ierr = MatDestroy(&M2);CHKERRQ(ierr); 284 ierr = MatDestroy(&M3);CHKERRQ(ierr); 285 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 286 if (!pcbddc->local_auxmat1) { 287 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 288 } else { 289 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 290 } 291 } 292 293 /* Get submatrices from subdomain matrix */ 294 if (n_vertices) { 295 PetscInt ibs,mbs; 296 PetscBool issbaij; 297 Mat newmat; 298 299 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 300 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 301 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 302 if (ibs != mbs) { /* need to convert to SEQAIJ */ 303 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 304 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 305 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 307 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 308 } else { 309 /* this is safe */ 310 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 311 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 312 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 313 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 314 /* which of the two approaches is faster? */ 315 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 316 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 317 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 318 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 319 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 320 } else { 321 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 322 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 323 } 324 } 325 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 326 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 327 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 328 } 329 330 /* Matrix of coarse basis functions (local) */ 331 if (pcbddc->coarse_phi_B) { 332 PetscInt on_B,on_primal; 333 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 334 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 335 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 336 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 337 } 338 } 339 if (pcbddc->coarse_phi_D) { 340 PetscInt on_D,on_primal; 341 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 342 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 343 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 344 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 345 } 346 } 347 if (!pcbddc->coarse_phi_B) { 348 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 349 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 350 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 351 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 352 } 353 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 354 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 355 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 356 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 357 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 358 } 359 360 if (pcbddc->dbg_flag) { 361 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 362 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 363 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 364 } 365 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 366 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 367 368 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 369 370 /* vertices */ 371 for (i=0;i<n_vertices;i++) { 372 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 373 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 374 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 375 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 376 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 377 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 378 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 379 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 380 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 381 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 382 if (n_constraints) { 383 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 384 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 385 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 386 } 387 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 388 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 389 390 /* Set values in coarse basis function and subdomain part of coarse_mat */ 391 /* coarse basis functions */ 392 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 393 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 394 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 395 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 396 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 397 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 398 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 399 if (pcbddc->switch_static || pcbddc->dbg_flag) { 400 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 401 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 402 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 403 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 404 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 405 } 406 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 407 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 408 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 409 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 410 if (n_constraints) { 411 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 412 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 413 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 414 } 415 416 /* check */ 417 if (pcbddc->dbg_flag) { 418 /* assemble subdomain vector on local nodes */ 419 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 420 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 421 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 422 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 423 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 425 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 426 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 427 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 428 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 429 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 430 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 431 if (n_constraints) { 432 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 433 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 434 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 435 } 436 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 437 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 438 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 439 /* check saddle point solution */ 440 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 441 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 442 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 443 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 444 /* shift by the identity matrix */ 445 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 446 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 447 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 449 } 450 } 451 452 /* constraints */ 453 for (i=0;i<n_constraints;i++) { 454 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 455 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 456 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 457 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 458 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 459 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 460 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 461 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 462 if (n_vertices) { 463 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 464 } 465 /* Set values in coarse basis function and subdomain part of coarse_mat */ 466 /* coarse basis functions */ 467 j = i+n_vertices; /* don't touch this! */ 468 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 469 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 470 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 472 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 473 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 474 if (pcbddc->switch_static || pcbddc->dbg_flag) { 475 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 476 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 478 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 479 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 480 } 481 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 482 if (n_vertices) { 483 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 484 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 485 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 486 } 487 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 488 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 489 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 490 491 if (pcbddc->dbg_flag) { 492 /* assemble subdomain vector on nodes */ 493 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 494 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 495 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 496 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 497 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 498 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 499 /* assemble subdomain vector of lagrange multipliers */ 500 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 501 if (n_vertices) { 502 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 503 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 504 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 505 } 506 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 507 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 508 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 509 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 510 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 511 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 512 /* check saddle point solution */ 513 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 514 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 515 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 516 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 517 /* shift by the identity matrix */ 518 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 519 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 520 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 521 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 522 } 523 } 524 /* call assembling routines for local coarse basis */ 525 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 526 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 527 if (pcbddc->switch_static || pcbddc->dbg_flag) { 528 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 529 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 530 } 531 532 /* compute other basis functions for non-symmetric problems */ 533 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 534 if (!setsym || (setsym && !issym)) { 535 if (!pcbddc->coarse_psi_B) { 536 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 537 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 538 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 539 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 540 } 541 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 542 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 543 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 544 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 545 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 546 } 547 for (i=0;i<pcbddc->local_primal_size;i++) { 548 if (n_constraints) { 549 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 550 for (j=0;j<n_constraints;j++) { 551 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 552 } 553 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 554 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 555 } 556 if (i<n_vertices) { 557 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 558 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 559 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 560 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 561 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 562 if (n_constraints) { 563 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 564 } 565 } else { 566 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 567 } 568 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 569 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 570 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 571 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 572 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 573 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 574 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 575 if (i<n_vertices) { 576 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 577 } 578 if (pcbddc->switch_static || pcbddc->dbg_flag) { 579 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 580 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 581 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 582 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 583 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 584 } 585 586 if (pcbddc->dbg_flag) { 587 /* assemble subdomain vector on nodes */ 588 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 589 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 590 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 591 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 592 if (i<n_vertices) { 593 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 594 } 595 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 596 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 597 /* assemble subdomain vector of lagrange multipliers */ 598 for (j=0;j<pcbddc->local_primal_size;j++) { 599 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 600 } 601 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 602 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 603 /* check saddle point solution */ 604 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 605 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 606 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 607 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 608 /* shift by the identity matrix */ 609 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 610 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 611 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 612 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 613 } 614 } 615 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 616 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 617 if (pcbddc->switch_static || pcbddc->dbg_flag) { 618 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 619 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 620 } 621 unsymmetric_check = PETSC_TRUE; 622 } else { /* take references to already computed coarse basis */ 623 unsymmetric_check = PETSC_FALSE; 624 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 625 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 626 if (pcbddc->coarse_phi_D) { 627 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 628 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 629 } 630 } 631 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 632 /* Checking coarse_sub_mat and coarse basis functios */ 633 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 634 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 635 if (pcbddc->dbg_flag) { 636 Mat coarse_sub_mat; 637 Mat AUXMAT,TM1,TM2,TM3,TM4; 638 Mat coarse_phi_D,coarse_phi_B; 639 Mat coarse_psi_D,coarse_psi_B; 640 Mat A_II,A_BB,A_IB,A_BI; 641 MatType checkmattype=MATSEQAIJ; 642 PetscReal real_value; 643 644 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 645 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 646 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 647 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 648 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 649 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 650 if (unsymmetric_check) { 651 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 652 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 653 } 654 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 655 656 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 657 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 658 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 659 if (unsymmetric_check) { 660 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 661 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 662 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 663 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 664 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 665 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 666 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 667 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 668 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 669 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 670 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 671 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 672 } else { 673 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 674 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 675 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 676 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 677 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 678 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 679 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 680 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 681 } 682 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 683 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 684 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 685 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 686 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 687 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 688 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 689 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 693 for (i=0;i<pcbddc->local_primal_size;i++) { 694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 695 } 696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 697 for (i=0;i<pcbddc->local_primal_size;i++) { 698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 699 } 700 if (unsymmetric_check) { 701 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 702 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 704 } 705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 706 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 708 } 709 } 710 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 711 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 712 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 713 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 714 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 715 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 716 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 717 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 718 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 719 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 720 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 721 if (unsymmetric_check) { 722 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 723 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 724 } 725 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 726 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 727 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 728 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 729 } 730 /* free memory */ 731 if (n_vertices) { 732 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 733 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 734 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 735 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 736 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 737 } 738 if (n_constraints) { 739 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 740 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 741 ierr = MatDestroy(&M1);CHKERRQ(ierr); 742 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 743 } 744 ierr = PetscFree(auxindices);CHKERRQ(ierr); 745 /* get back data */ 746 *coarse_submat_vals_n = coarse_submat_vals; 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNCT__ 751 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 752 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 753 { 754 PC_IS* pcis = (PC_IS*)(pc->data); 755 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 756 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 757 PetscBool issbaij,isseqaij; 758 /* manage repeated solves */ 759 MatReuse reuse; 760 MatStructure matstruct; 761 PetscErrorCode ierr; 762 763 PetscFunctionBegin; 764 if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) { 765 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 766 } 767 /* get mat flags */ 768 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 769 reuse = MAT_INITIAL_MATRIX; 770 if (pc->setupcalled) { 771 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 772 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 773 if (matstruct == SAME_NONZERO_PATTERN) { 774 reuse = MAT_REUSE_MATRIX; 775 } else { 776 reuse = MAT_INITIAL_MATRIX; 777 } 778 } 779 if (reuse == MAT_INITIAL_MATRIX) { 780 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 781 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 782 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 783 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 784 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 785 } 786 787 /* transform local matrices if needed */ 788 if (pcbddc->use_change_of_basis) { 789 Mat change_mat_all; 790 PetscScalar *row_cmat_values; 791 PetscInt *row_cmat_indices; 792 PetscInt *nnz,*is_indices,*temp_indices; 793 PetscInt i,j,k,n_D,n_B; 794 795 /* Get Non-overlapping dimensions */ 796 n_B = pcis->n_B; 797 n_D = pcis->n-n_B; 798 799 /* compute nonzero structure of change of basis on all local nodes */ 800 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 801 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 802 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 803 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 804 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 805 k=1; 806 for (i=0;i<n_B;i++) { 807 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 808 nnz[is_indices[i]]=j; 809 if (k < j) k = j; 810 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 811 } 812 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 813 /* assemble change of basis matrix on the whole set of local dofs */ 814 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 815 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 816 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 817 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 818 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 819 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 820 for (i=0;i<n_D;i++) { 821 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 822 } 823 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 824 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 825 for (i=0;i<n_B;i++) { 826 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 827 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 828 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 829 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 830 } 831 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 832 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 833 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 834 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr); 835 if (isseqaij) { 836 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 837 } else { 838 Mat work_mat; 839 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 840 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 841 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 842 } 843 /* 844 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 845 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 846 */ 847 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 848 ierr = PetscFree(nnz);CHKERRQ(ierr); 849 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 850 } else { 851 /* without change of basis, the local matrix is unchanged */ 852 if (!pcbddc->local_mat) { 853 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 854 pcbddc->local_mat = matis->A; 855 } 856 } 857 858 /* get submatrices */ 859 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 860 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 861 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 862 if (!issbaij) { 863 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 864 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 865 } else { 866 Mat newmat; 867 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 868 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 869 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 870 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 871 } 872 PetscFunctionReturn(0); 873 } 874 875 #undef __FUNCT__ 876 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 877 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 878 { 879 PC_IS* pcis = (PC_IS*)(pc->data); 880 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 881 IS is_aux1,is_aux2; 882 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 883 PetscInt n_vertices,i,j,n_R,n_D,n_B; 884 PetscInt vbs,bs; 885 PetscBT bitmask; 886 PetscErrorCode ierr; 887 888 PetscFunctionBegin; 889 /* 890 No need to setup local scatters if 891 - primal space is unchanged 892 AND 893 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 894 AND 895 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 896 */ 897 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 898 PetscFunctionReturn(0); 899 } 900 /* destroy old objects */ 901 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 902 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 903 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 904 /* Set Non-overlapping dimensions */ 905 n_B = pcis->n_B; n_D = pcis->n - n_B; 906 n_vertices = pcbddc->n_actual_vertices; 907 /* create auxiliary bitmask */ 908 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 909 for (i=0;i<n_vertices;i++) { 910 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 911 } 912 913 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 914 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 915 for (i=0, n_R=0; i<pcis->n; i++) { 916 if (!PetscBTLookup(bitmask,i)) { 917 idx_R_local[n_R] = i; 918 n_R++; 919 } 920 } 921 922 /* Block code */ 923 vbs = 1; 924 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 925 if (bs>1 && !(n_vertices%bs)) { 926 PetscBool is_blocked = PETSC_TRUE; 927 PetscInt *vary; 928 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 929 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 930 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 931 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 932 for (i=0; i<n_vertices; i++) { 933 if (vary[i]!=0 && vary[i]!=bs) { 934 is_blocked = PETSC_FALSE; 935 break; 936 } 937 } 938 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 939 vbs = bs; 940 for (i=0;i<n_R/vbs;i++) { 941 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 942 } 943 } 944 ierr = PetscFree(vary);CHKERRQ(ierr); 945 } 946 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 947 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 948 949 /* print some info if requested */ 950 if (pcbddc->dbg_flag) { 951 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 952 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 953 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 954 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 956 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 957 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 958 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 959 } 960 961 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 962 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 963 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 964 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 965 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 966 for (i=0; i<n_D; i++) { 967 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 968 } 969 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 970 for (i=0, j=0; i<n_R; i++) { 971 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 972 aux_array1[j++] = i; 973 } 974 } 975 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 976 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 977 for (i=0, j=0; i<n_B; i++) { 978 if (!PetscBTLookup(bitmask,is_indices[i])) { 979 aux_array2[j++] = i; 980 } 981 } 982 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 983 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 984 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 985 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 986 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 987 988 if (pcbddc->switch_static || pcbddc->dbg_flag) { 989 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 990 for (i=0, j=0; i<n_R; i++) { 991 if (PetscBTLookup(bitmask,idx_R_local[i])) { 992 aux_array1[j++] = i; 993 } 994 } 995 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 996 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 997 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 998 } 999 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1000 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1007 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1008 { 1009 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1010 PC_IS *pcis = (PC_IS*)pc->data; 1011 PC pc_temp; 1012 Mat A_RR; 1013 MatStructure matstruct; 1014 MatReuse reuse; 1015 PetscScalar m_one = -1.0; 1016 PetscReal value; 1017 PetscInt n_D,n_R,ibs,mbs; 1018 PetscBool use_exact,use_exact_reduced,issbaij; 1019 PetscErrorCode ierr; 1020 /* prefixes stuff */ 1021 char dir_prefix[256],neu_prefix[256],str_level[3]; 1022 size_t len; 1023 1024 PetscFunctionBegin; 1025 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 1026 1027 /* compute prefixes */ 1028 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1029 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1030 if (!pcbddc->current_level) { 1031 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1032 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1033 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1034 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1035 } else { 1036 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1037 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 1038 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1039 len -= 15; /* remove "pc_bddc_coarse_" */ 1040 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 1041 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1042 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1043 *(dir_prefix+len)='\0'; 1044 *(neu_prefix+len)='\0'; 1045 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1046 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1047 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1048 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1049 } 1050 1051 /* DIRICHLET PROBLEM */ 1052 /* Matrix for Dirichlet problem is pcis->A_II */ 1053 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1054 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1055 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1056 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1057 /* default */ 1058 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1059 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1060 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1061 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1062 if (issbaij) { 1063 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1064 } else { 1065 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1066 } 1067 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1068 } 1069 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 1070 /* Allow user's customization */ 1071 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1072 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1073 if (!n_D) { 1074 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1075 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1076 } 1077 /* Set Up KSP for Dirichlet problem of BDDC */ 1078 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1079 /* set ksp_D into pcis data */ 1080 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1081 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1082 pcis->ksp_D = pcbddc->ksp_D; 1083 1084 /* NEUMANN PROBLEM */ 1085 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1086 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1087 if (pcbddc->ksp_R) { /* already created ksp */ 1088 PetscInt nn_R; 1089 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr); 1090 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1091 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1092 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1093 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1094 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1095 reuse = MAT_INITIAL_MATRIX; 1096 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1097 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1098 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1099 reuse = MAT_INITIAL_MATRIX; 1100 } else { /* safe to reuse the matrix */ 1101 reuse = MAT_REUSE_MATRIX; 1102 } 1103 } 1104 /* last check */ 1105 if (matstruct == DIFFERENT_NONZERO_PATTERN) { 1106 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1107 reuse = MAT_INITIAL_MATRIX; 1108 } 1109 } else { /* first time, so we need to create the matrix */ 1110 reuse = MAT_INITIAL_MATRIX; 1111 } 1112 /* extract A_RR */ 1113 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1114 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1115 if (ibs != mbs) { 1116 Mat newmat; 1117 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1118 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1119 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1120 } else { 1121 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1122 } 1123 if (!pcbddc->ksp_R) { /* create object if not present */ 1124 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1125 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1126 /* default */ 1127 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1128 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1129 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1130 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1131 if (issbaij) { 1132 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1133 } else { 1134 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1135 } 1136 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1137 } 1138 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1139 /* Allow user's customization */ 1140 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1141 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1142 if (!n_R) { 1143 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1144 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1145 } 1146 /* Set Up KSP for Neumann problem of BDDC */ 1147 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1148 1149 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1150 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1151 /* Dirichlet */ 1152 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1153 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1154 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1155 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1156 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1157 /* need to be adapted? */ 1158 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1159 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1160 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1161 /* print info */ 1162 if (pcbddc->dbg_flag) { 1163 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1164 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1165 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1166 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1167 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1168 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1169 } 1170 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1171 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1172 } 1173 1174 /* Neumann */ 1175 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1176 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1177 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1178 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1179 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1180 /* need to be adapted? */ 1181 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1182 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1183 /* print info */ 1184 if (pcbddc->dbg_flag) { 1185 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1186 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1187 } 1188 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1189 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1190 } 1191 } 1192 /* free Neumann problem's matrix */ 1193 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1199 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1200 { 1201 PetscErrorCode ierr; 1202 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1203 1204 PetscFunctionBegin; 1205 if (applytranspose) { 1206 if (pcbddc->local_auxmat1) { 1207 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1208 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1209 } 1210 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1211 } else { 1212 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1213 if (pcbddc->local_auxmat1) { 1214 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1215 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1216 } 1217 } 1218 PetscFunctionReturn(0); 1219 } 1220 1221 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1224 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1225 { 1226 PetscErrorCode ierr; 1227 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1228 PC_IS* pcis = (PC_IS*) (pc->data); 1229 const PetscScalar zero = 0.0; 1230 1231 PetscFunctionBegin; 1232 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1233 if (applytranspose) { 1234 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1235 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1236 } else { 1237 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1238 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1239 } 1240 /* Scatter data of coarse_rhs */ 1241 ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 1242 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1243 1244 /* Local solution on R nodes */ 1245 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1246 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1247 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1248 if (pcbddc->switch_static) { 1249 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1250 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1251 } 1252 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1253 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1254 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1255 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1256 if (pcbddc->switch_static) { 1257 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1258 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1259 } 1260 1261 /* Coarse solution */ 1262 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1263 /* TODO remove null space when doing multilevel */ 1264 if (applytranspose) { 1265 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1266 } else { 1267 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1268 } 1269 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1270 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1271 1272 /* Sum contributions from two levels */ 1273 if (applytranspose) { 1274 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1275 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1276 } else { 1277 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1278 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1279 } 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1285 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1286 { 1287 PetscErrorCode ierr; 1288 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1289 1290 PetscFunctionBegin; 1291 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 #undef __FUNCT__ 1296 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1297 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1298 { 1299 PetscErrorCode ierr; 1300 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1301 1302 PetscFunctionBegin; 1303 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 /* uncomment for testing purposes */ 1308 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1311 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1312 { 1313 PetscErrorCode ierr; 1314 PC_IS* pcis = (PC_IS*)(pc->data); 1315 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1316 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1317 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1318 MatType impMatType=MATSEQAIJ; 1319 /* one and zero */ 1320 PetscScalar one=1.0,zero=0.0; 1321 /* space to store constraints and their local indices */ 1322 PetscScalar *temp_quadrature_constraint; 1323 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1324 /* iterators */ 1325 PetscInt i,j,k,total_counts,temp_start_ptr; 1326 /* stuff to store connected components stored in pcbddc->mat_graph */ 1327 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1328 PetscInt n_ISForFaces,n_ISForEdges; 1329 /* near null space stuff */ 1330 MatNullSpace nearnullsp; 1331 const Vec *nearnullvecs; 1332 Vec *localnearnullsp; 1333 PetscBool nnsp_has_cnst; 1334 PetscInt nnsp_size; 1335 PetscScalar *array; 1336 /* BLAS integers */ 1337 PetscBLASInt lwork,lierr; 1338 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1339 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1340 /* LAPACK working arrays for SVD or POD */ 1341 PetscBool skip_lapack; 1342 PetscScalar *work; 1343 PetscReal *singular_vals; 1344 #if defined(PETSC_USE_COMPLEX) 1345 PetscReal *rwork; 1346 #endif 1347 #if defined(PETSC_MISSING_LAPACK_GESVD) 1348 PetscBLASInt Blas_one_2=1; 1349 PetscScalar *temp_basis,*correlation_mat; 1350 #else 1351 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1352 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1353 #endif 1354 /* reuse */ 1355 PetscInt olocal_primal_size; 1356 PetscInt *oprimal_indices_local_idxs; 1357 /* change of basis */ 1358 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1359 PetscBool boolforchange,qr_needed; 1360 PetscBT touched,change_basis,qr_needed_idx; 1361 /* auxiliary stuff */ 1362 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1363 /* some quantities */ 1364 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1365 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1366 1367 1368 PetscFunctionBegin; 1369 /* Destroy Mat objects computed previously */ 1370 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1371 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1372 /* Get index sets for faces, edges and vertices from graph */ 1373 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1374 pcbddc->use_vertices = PETSC_TRUE; 1375 } 1376 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1377 /* HACK: provide functions to set change of basis */ 1378 if (!ISForVertices && pcbddc->NullSpace) { 1379 pcbddc->use_change_of_basis = PETSC_TRUE; 1380 pcbddc->use_change_on_faces = PETSC_FALSE; 1381 } 1382 /* print some info */ 1383 if (pcbddc->dbg_flag) { 1384 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1385 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1386 i = 0; 1387 if (ISForVertices) { 1388 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1389 } 1390 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1391 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1392 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1393 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1394 } 1395 /* check if near null space is attached to global mat */ 1396 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1397 if (nearnullsp) { 1398 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1399 /* remove any stored info */ 1400 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1401 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1402 /* store information for BDDC solver reuse */ 1403 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1404 pcbddc->onearnullspace = nearnullsp; 1405 ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1406 for (i=0;i<nnsp_size;i++) { 1407 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1408 } 1409 } else { /* if near null space is not provided BDDC uses constants by default */ 1410 nnsp_size = 0; 1411 nnsp_has_cnst = PETSC_TRUE; 1412 } 1413 /* get max number of constraints on a single cc */ 1414 max_constraints = nnsp_size; 1415 if (nnsp_has_cnst) max_constraints++; 1416 1417 /* 1418 Evaluate maximum storage size needed by the procedure 1419 - temp_indices will contain start index of each constraint stored as follows 1420 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1421 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1422 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1423 */ 1424 total_counts = n_ISForFaces+n_ISForEdges; 1425 total_counts *= max_constraints; 1426 n_vertices = 0; 1427 if (ISForVertices) { 1428 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1429 } 1430 total_counts += n_vertices; 1431 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1432 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1433 total_counts = 0; 1434 max_size_of_constraint = 0; 1435 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1436 if (i<n_ISForEdges) { 1437 used_IS = &ISForEdges[i]; 1438 } else { 1439 used_IS = &ISForFaces[i-n_ISForEdges]; 1440 } 1441 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1442 total_counts += j; 1443 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1444 } 1445 total_counts *= max_constraints; 1446 total_counts += n_vertices; 1447 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1448 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1449 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1450 /* get local part of global near null space vectors */ 1451 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1452 for (k=0;k<nnsp_size;k++) { 1453 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1454 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1455 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1456 } 1457 1458 /* whether or not to skip lapack calls */ 1459 skip_lapack = PETSC_TRUE; 1460 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1461 1462 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1463 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1464 PetscScalar temp_work; 1465 #if defined(PETSC_MISSING_LAPACK_GESVD) 1466 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1467 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1468 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1469 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1470 #if defined(PETSC_USE_COMPLEX) 1471 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1472 #endif 1473 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1474 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1475 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1476 lwork = -1; 1477 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1478 #if !defined(PETSC_USE_COMPLEX) 1479 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1480 #else 1481 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1482 #endif 1483 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1484 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1485 #else /* on missing GESVD */ 1486 /* SVD */ 1487 PetscInt max_n,min_n; 1488 max_n = max_size_of_constraint; 1489 min_n = max_constraints; 1490 if (max_size_of_constraint < max_constraints) { 1491 min_n = max_size_of_constraint; 1492 max_n = max_constraints; 1493 } 1494 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1495 #if defined(PETSC_USE_COMPLEX) 1496 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1497 #endif 1498 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1499 lwork = -1; 1500 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1501 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1502 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1503 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1504 #if !defined(PETSC_USE_COMPLEX) 1505 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1506 #else 1507 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1508 #endif 1509 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1510 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1511 #endif /* on missing GESVD */ 1512 /* Allocate optimal workspace */ 1513 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1514 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1515 } 1516 /* Now we can loop on constraining sets */ 1517 total_counts = 0; 1518 temp_indices[0] = 0; 1519 /* vertices */ 1520 if (ISForVertices) { 1521 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1522 if (nnsp_has_cnst) { /* consider all vertices */ 1523 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1524 for (i=0;i<n_vertices;i++) { 1525 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1526 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1527 total_counts++; 1528 } 1529 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1530 PetscBool used_vertex; 1531 for (i=0;i<n_vertices;i++) { 1532 used_vertex = PETSC_FALSE; 1533 k = 0; 1534 while (!used_vertex && k<nnsp_size) { 1535 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1536 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1537 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1538 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1539 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1540 total_counts++; 1541 used_vertex = PETSC_TRUE; 1542 } 1543 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1544 k++; 1545 } 1546 } 1547 } 1548 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1549 n_vertices = total_counts; 1550 } 1551 1552 /* edges and faces */ 1553 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1554 if (i<n_ISForEdges) { 1555 used_IS = &ISForEdges[i]; 1556 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1557 } else { 1558 used_IS = &ISForFaces[i-n_ISForEdges]; 1559 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1560 } 1561 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1562 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1563 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1564 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1565 /* change of basis should not be performed on local periodic nodes */ 1566 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1567 if (nnsp_has_cnst) { 1568 PetscScalar quad_value; 1569 temp_constraints++; 1570 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1571 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1572 for (j=0;j<size_of_constraint;j++) { 1573 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1574 } 1575 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1576 total_counts++; 1577 } 1578 for (k=0;k<nnsp_size;k++) { 1579 PetscReal real_value; 1580 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1581 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1582 for (j=0;j<size_of_constraint;j++) { 1583 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1584 } 1585 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1586 /* check if array is null on the connected component */ 1587 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1588 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1589 if (real_value > 0.0) { /* keep indices and values */ 1590 temp_constraints++; 1591 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1592 total_counts++; 1593 } 1594 } 1595 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1596 valid_constraints = temp_constraints; 1597 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1598 if (!pcbddc->use_nnsp_true && temp_constraints) { 1599 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1600 1601 #if defined(PETSC_MISSING_LAPACK_GESVD) 1602 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1603 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1604 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1605 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1606 from that computed using LAPACKgesvd 1607 -> This is due to a different computation of eigenvectors in LAPACKheev 1608 -> The quality of the POD-computed basis will be the same */ 1609 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1610 /* Store upper triangular part of correlation matrix */ 1611 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1612 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1613 for (j=0;j<temp_constraints;j++) { 1614 for (k=0;k<j+1;k++) { 1615 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1616 } 1617 } 1618 /* compute eigenvalues and eigenvectors of correlation matrix */ 1619 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1620 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1621 #if !defined(PETSC_USE_COMPLEX) 1622 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1623 #else 1624 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1625 #endif 1626 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1627 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1628 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1629 j=0; 1630 while (j < temp_constraints && singular_vals[j] < tol) j++; 1631 total_counts=total_counts-j; 1632 valid_constraints = temp_constraints-j; 1633 /* scale and copy POD basis into used quadrature memory */ 1634 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1635 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1636 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1637 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1638 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1639 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1640 if (j<temp_constraints) { 1641 PetscInt ii; 1642 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1643 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1644 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1645 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1646 for (k=0;k<temp_constraints-j;k++) { 1647 for (ii=0;ii<size_of_constraint;ii++) { 1648 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1649 } 1650 } 1651 } 1652 #else /* on missing GESVD */ 1653 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1654 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1655 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1656 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1657 #if !defined(PETSC_USE_COMPLEX) 1658 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1659 #else 1660 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1661 #endif 1662 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1663 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1664 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1665 k = temp_constraints; 1666 if (k > size_of_constraint) k = size_of_constraint; 1667 j = 0; 1668 while (j < k && singular_vals[k-j-1] < tol) j++; 1669 total_counts = total_counts-temp_constraints+k-j; 1670 valid_constraints = k-j; 1671 #endif /* on missing GESVD */ 1672 } 1673 /* setting change_of_basis flag is safe now */ 1674 if (boolforchange) { 1675 for (j=0;j<valid_constraints;j++) { 1676 PetscBTSet(change_basis,total_counts-j-1); 1677 } 1678 } 1679 } 1680 /* free index sets of faces, edges and vertices */ 1681 for (i=0;i<n_ISForFaces;i++) { 1682 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1683 } 1684 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1685 for (i=0;i<n_ISForEdges;i++) { 1686 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1687 } 1688 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1689 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1690 /* map temp_indices_to_constraint in boundary numbering */ 1691 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1692 if (i != temp_indices[total_counts]) { 1693 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1694 } 1695 1696 /* free workspace */ 1697 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1698 ierr = PetscFree(work);CHKERRQ(ierr); 1699 #if defined(PETSC_USE_COMPLEX) 1700 ierr = PetscFree(rwork);CHKERRQ(ierr); 1701 #endif 1702 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1703 #if defined(PETSC_MISSING_LAPACK_GESVD) 1704 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1705 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1706 #endif 1707 } 1708 for (k=0;k<nnsp_size;k++) { 1709 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1710 } 1711 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1712 1713 /* set quantities in pcbddc data structure and store previous primal size */ 1714 /* n_vertices defines the number of subdomain corners in the primal space */ 1715 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1716 olocal_primal_size = pcbddc->local_primal_size; 1717 pcbddc->local_primal_size = total_counts; 1718 pcbddc->n_vertices = n_vertices; 1719 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1720 1721 /* Create constraint matrix */ 1722 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1723 /* so we need to set it up properly either with or without change of basis */ 1724 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1725 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1726 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1727 /* array to compute a local numbering of constraints : vertices first then constraints */ 1728 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1729 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1730 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1731 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1732 /* auxiliary stuff for basis change */ 1733 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1734 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1735 1736 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1737 total_primal_vertices=0; 1738 for (i=0;i<pcbddc->local_primal_size;i++) { 1739 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1740 if (size_of_constraint == 1) { 1741 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1742 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1743 aux_primal_minloc[total_primal_vertices]=0; 1744 total_primal_vertices++; 1745 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1746 PetscInt min_loc,min_index; 1747 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1748 /* find first untouched local node */ 1749 k = 0; 1750 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1751 min_index = global_indices[k]; 1752 min_loc = k; 1753 /* search the minimum among global nodes already untouched on the cc */ 1754 for (k=1;k<size_of_constraint;k++) { 1755 /* there can be more than one constraint on a single connected component */ 1756 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1757 min_index = global_indices[k]; 1758 min_loc = k; 1759 } 1760 } 1761 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1762 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1763 aux_primal_minloc[total_primal_vertices]=min_loc; 1764 total_primal_vertices++; 1765 } 1766 } 1767 /* determine if a QR strategy is needed for change of basis */ 1768 qr_needed = PETSC_FALSE; 1769 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1770 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1771 if (PetscBTLookup(change_basis,i)) { 1772 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1773 j = 0; 1774 for (k=0;k<size_of_constraint;k++) { 1775 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1776 j++; 1777 } 1778 } 1779 /* found more than one primal dof on the cc */ 1780 if (j > 1) { 1781 PetscBTSet(qr_needed_idx,i); 1782 qr_needed = PETSC_TRUE; 1783 } 1784 } 1785 } 1786 /* free workspace */ 1787 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1788 1789 /* permute indices in order to have a sorted set of vertices */ 1790 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1791 1792 /* nonzero structure of constraint matrix */ 1793 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1794 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1795 j=total_primal_vertices; 1796 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1797 if (!PetscBTLookup(change_basis,i)) { 1798 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1799 j++; 1800 } 1801 } 1802 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1803 ierr = PetscFree(nnz);CHKERRQ(ierr); 1804 /* set values in constraint matrix */ 1805 for (i=0;i<total_primal_vertices;i++) { 1806 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1807 } 1808 total_counts = total_primal_vertices; 1809 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1810 if (!PetscBTLookup(change_basis,i)) { 1811 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1812 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1813 total_counts++; 1814 } 1815 } 1816 /* assembling */ 1817 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1818 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1819 /* 1820 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1821 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1822 */ 1823 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1824 if (pcbddc->use_change_of_basis) { 1825 /* dual and primal dofs on a single cc */ 1826 PetscInt dual_dofs,primal_dofs; 1827 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1828 PetscInt primal_counter; 1829 /* working stuff for GEQRF */ 1830 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1831 PetscBLASInt lqr_work; 1832 /* working stuff for UNGQR */ 1833 PetscScalar *gqr_work,lgqr_work_t; 1834 PetscBLASInt lgqr_work; 1835 /* working stuff for TRTRS */ 1836 PetscScalar *trs_rhs; 1837 PetscBLASInt Blas_NRHS; 1838 /* pointers for values insertion into change of basis matrix */ 1839 PetscInt *start_rows,*start_cols; 1840 PetscScalar *start_vals; 1841 /* working stuff for values insertion */ 1842 PetscBT is_primal; 1843 1844 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1845 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1846 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1847 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1848 /* work arrays */ 1849 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1850 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1851 /* nonzeros per row */ 1852 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1853 if (PetscBTLookup(change_basis,i)) { 1854 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1855 if (PetscBTLookup(qr_needed_idx,i)) { 1856 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1857 } else { 1858 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1859 /* get local primal index on the cc */ 1860 j = 0; 1861 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1862 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1863 } 1864 } 1865 } 1866 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1867 ierr = PetscFree(nnz);CHKERRQ(ierr); 1868 /* Set initial identity in the matrix */ 1869 for (i=0;i<pcis->n_B;i++) { 1870 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1871 } 1872 1873 if (pcbddc->dbg_flag) { 1874 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1875 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1876 } 1877 1878 1879 /* Now we loop on the constraints which need a change of basis */ 1880 /* 1881 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1882 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1883 1884 Basic blocks of change of basis matrix T computed 1885 1886 - Using the following block transformation if there is only a primal dof on the cc 1887 (in the example, primal dof is the last one of the edge in LOCAL ordering 1888 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1889 | 1 0 ... 0 1 | 1890 | 0 1 ... 0 1 | 1891 | ... | 1892 | 0 ... 1 1 | 1893 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1894 1895 - via QR decomposition of constraints otherwise 1896 */ 1897 if (qr_needed) { 1898 /* space to store Q */ 1899 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1900 /* first we issue queries for optimal work */ 1901 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1902 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1903 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1904 lqr_work = -1; 1905 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1906 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1907 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1908 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1909 lgqr_work = -1; 1910 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1911 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1912 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1913 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1914 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1915 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1916 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1917 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1918 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1919 /* array to store scaling factors for reflectors */ 1920 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1921 /* array to store rhs and solution of triangular solver */ 1922 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1923 /* allocating workspace for check */ 1924 if (pcbddc->dbg_flag) { 1925 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1926 } 1927 } 1928 /* array to store whether a node is primal or not */ 1929 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1930 ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr); 1931 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 1932 if (i != total_primal_vertices) { 1933 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 1934 } 1935 for (i=0;i<total_primal_vertices;i++) { 1936 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 1937 } 1938 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 1939 1940 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1941 /* -> using implicit ordering contained in temp_indices data */ 1942 total_counts = pcbddc->n_vertices; 1943 primal_counter = total_counts; 1944 while (total_counts<pcbddc->local_primal_size) { 1945 primal_dofs = 1; 1946 if (PetscBTLookup(change_basis,total_counts)) { 1947 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1948 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1949 primal_dofs++; 1950 } 1951 /* get constraint info */ 1952 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1953 dual_dofs = size_of_constraint-primal_dofs; 1954 1955 if (pcbddc->dbg_flag) { 1956 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1957 } 1958 1959 if (primal_dofs > 1) { /* QR */ 1960 1961 /* copy quadrature constraints for change of basis check */ 1962 if (pcbddc->dbg_flag) { 1963 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1964 } 1965 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1966 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1967 1968 /* compute QR decomposition of constraints */ 1969 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1970 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1971 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1972 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1973 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1974 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1975 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1976 1977 /* explictly compute R^-T */ 1978 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1979 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1980 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1981 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1982 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1983 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1984 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1985 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1986 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1987 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1988 1989 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1990 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1991 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1992 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1993 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1994 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1995 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1996 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1997 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1998 1999 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2000 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2001 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2002 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2003 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2004 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2005 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2006 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2007 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2008 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2009 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2010 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2011 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2012 2013 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2014 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2015 /* insert cols for primal dofs */ 2016 for (j=0;j<primal_dofs;j++) { 2017 start_vals = &qr_basis[j*size_of_constraint]; 2018 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2019 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2020 } 2021 /* insert cols for dual dofs */ 2022 for (j=0,k=0;j<dual_dofs;k++) { 2023 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2024 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2025 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2026 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2027 j++; 2028 } 2029 } 2030 2031 /* check change of basis */ 2032 if (pcbddc->dbg_flag) { 2033 PetscInt ii,jj; 2034 PetscBool valid_qr=PETSC_TRUE; 2035 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2036 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2037 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2038 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2039 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2040 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2041 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2042 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2043 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2044 for (jj=0;jj<size_of_constraint;jj++) { 2045 for (ii=0;ii<primal_dofs;ii++) { 2046 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2047 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2048 } 2049 } 2050 if (!valid_qr) { 2051 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2052 for (jj=0;jj<size_of_constraint;jj++) { 2053 for (ii=0;ii<primal_dofs;ii++) { 2054 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2055 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2056 } 2057 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2058 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2059 } 2060 } 2061 } 2062 } else { 2063 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2064 } 2065 } 2066 } else { /* simple transformation block */ 2067 PetscInt row,col; 2068 PetscScalar val; 2069 for (j=0;j<size_of_constraint;j++) { 2070 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2071 if (!PetscBTLookup(is_primal,row)) { 2072 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2073 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2074 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2075 } else { 2076 for (k=0;k<size_of_constraint;k++) { 2077 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2078 if (row != col) { 2079 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2080 } else { 2081 val = 1.0; 2082 } 2083 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2084 } 2085 } 2086 } 2087 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2088 } 2089 /* increment primal counter */ 2090 primal_counter += primal_dofs; 2091 } else { 2092 if (pcbddc->dbg_flag) { 2093 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2094 } 2095 } 2096 /* increment constraint counter total_counts */ 2097 total_counts += primal_dofs; 2098 } 2099 2100 /* free workspace */ 2101 if (qr_needed) { 2102 if (pcbddc->dbg_flag) { 2103 ierr = PetscFree(work);CHKERRQ(ierr); 2104 } 2105 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2106 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2107 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2108 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2109 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2110 } 2111 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2112 /* assembling */ 2113 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2114 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2115 /* 2116 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2117 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2118 */ 2119 } 2120 2121 /* get indices in local ordering for vertices and constraints */ 2122 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2123 ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr); 2124 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2125 } 2126 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2127 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2128 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2129 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2130 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2131 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2132 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2133 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2134 /* set quantities in PCBDDC data struct */ 2135 pcbddc->n_actual_vertices = i; 2136 /* check if a new primal space has been introduced */ 2137 pcbddc->new_primal_space_local = PETSC_TRUE; 2138 if (olocal_primal_size == pcbddc->local_primal_size) { 2139 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2140 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2141 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2142 } 2143 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2144 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2145 2146 /* flush dbg viewer */ 2147 if (pcbddc->dbg_flag) { 2148 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2149 } 2150 2151 /* free workspace */ 2152 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2153 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2154 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2155 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2156 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2157 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2158 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2159 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2160 PetscFunctionReturn(0); 2161 } 2162 2163 #undef __FUNCT__ 2164 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2165 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2166 { 2167 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2168 PC_IS *pcis = (PC_IS*)pc->data; 2169 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2170 PetscInt ierr,i,vertex_size; 2171 PetscViewer viewer=pcbddc->dbg_viewer; 2172 2173 PetscFunctionBegin; 2174 /* Reset previously computed graph */ 2175 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2176 /* Init local Graph struct */ 2177 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2178 2179 /* Check validity of the csr graph passed in by the user */ 2180 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2181 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2182 } 2183 2184 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2185 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2186 Mat mat_adj; 2187 const PetscInt *xadj,*adjncy; 2188 PetscBool flg_row=PETSC_TRUE; 2189 2190 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2191 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2192 if (!flg_row) { 2193 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2194 } 2195 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2196 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2197 if (!flg_row) { 2198 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2199 } 2200 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2201 } 2202 2203 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2204 vertex_size = 1; 2205 if (pcbddc->user_provided_isfordofs) { 2206 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2207 ierr = PetscMalloc(pcbddc->n_ISForDofs*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2208 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2209 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2210 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2211 } 2212 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2213 pcbddc->n_ISForDofs = 0; 2214 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2215 } 2216 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2217 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2218 } else { 2219 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2220 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2221 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2222 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2223 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2224 } 2225 } 2226 } 2227 2228 /* Setup of Graph */ 2229 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2230 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2231 } 2232 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2233 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2234 } 2235 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2236 2237 /* Graph's connected components analysis */ 2238 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2239 2240 /* print some info to stdout */ 2241 if (pcbddc->dbg_flag) { 2242 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2243 } 2244 2245 /* mark topography has done */ 2246 pcbddc->recompute_topography = PETSC_FALSE; 2247 PetscFunctionReturn(0); 2248 } 2249 2250 #undef __FUNCT__ 2251 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2252 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2253 { 2254 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2255 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2256 PetscErrorCode ierr; 2257 2258 PetscFunctionBegin; 2259 n = 0; 2260 vertices = 0; 2261 if (pcbddc->ConstraintMatrix) { 2262 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2263 for (i=0;i<local_primal_size;i++) { 2264 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2265 if (size_of_constraint == 1) n++; 2266 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2267 } 2268 if (vertices_idx) { 2269 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2270 n = 0; 2271 for (i=0;i<local_primal_size;i++) { 2272 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2273 if (size_of_constraint == 1) { 2274 vertices[n++]=row_cmat_indices[0]; 2275 } 2276 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2277 } 2278 } 2279 } 2280 *n_vertices = n; 2281 if (vertices_idx) *vertices_idx = vertices; 2282 PetscFunctionReturn(0); 2283 } 2284 2285 #undef __FUNCT__ 2286 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2287 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2288 { 2289 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2290 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2291 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2292 PetscBT touched; 2293 PetscErrorCode ierr; 2294 2295 /* This function assumes that the number of local constraints per connected component 2296 is not greater than the number of nodes defined for the connected component 2297 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2298 PetscFunctionBegin; 2299 n = 0; 2300 constraints_index = 0; 2301 if (pcbddc->ConstraintMatrix) { 2302 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2303 max_size_of_constraint = 0; 2304 for (i=0;i<local_primal_size;i++) { 2305 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2306 if (size_of_constraint > 1) { 2307 n++; 2308 } 2309 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2310 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2311 } 2312 if (constraints_idx) { 2313 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2314 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2315 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2316 n = 0; 2317 for (i=0;i<local_primal_size;i++) { 2318 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2319 if (size_of_constraint > 1) { 2320 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2321 /* find first untouched local node */ 2322 j = 0; 2323 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2324 min_index = row_cmat_global_indices[j]; 2325 min_loc = j; 2326 /* search the minimum among nodes not yet touched on the connected component 2327 since there can be more than one constraint on a single cc */ 2328 for (j=1;j<size_of_constraint;j++) { 2329 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2330 min_index = row_cmat_global_indices[j]; 2331 min_loc = j; 2332 } 2333 } 2334 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2335 constraints_index[n++] = row_cmat_indices[min_loc]; 2336 } 2337 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2338 } 2339 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2340 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2341 } 2342 } 2343 *n_constraints = n; 2344 if (constraints_idx) *constraints_idx = constraints_index; 2345 PetscFunctionReturn(0); 2346 } 2347 2348 /* the next two functions has been adapted from pcis.c */ 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "PCBDDCApplySchur" 2351 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2352 { 2353 PetscErrorCode ierr; 2354 PC_IS *pcis = (PC_IS*)(pc->data); 2355 2356 PetscFunctionBegin; 2357 if (!vec2_B) { vec2_B = v; } 2358 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2359 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2360 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2361 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2362 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2363 PetscFunctionReturn(0); 2364 } 2365 2366 #undef __FUNCT__ 2367 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2368 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2369 { 2370 PetscErrorCode ierr; 2371 PC_IS *pcis = (PC_IS*)(pc->data); 2372 2373 PetscFunctionBegin; 2374 if (!vec2_B) { vec2_B = v; } 2375 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2376 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2377 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2378 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2379 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "PCBDDCSubsetNumbering" 2385 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2386 { 2387 Vec local_vec,global_vec; 2388 IS seqis,paris; 2389 VecScatter scatter_ctx; 2390 PetscScalar *array; 2391 PetscInt *temp_global_dofs; 2392 PetscScalar globalsum; 2393 PetscInt i,j,s; 2394 PetscInt nlocals,first_index,old_index,max_local; 2395 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2396 PetscMPIInt *dof_sizes,*dof_displs; 2397 PetscBool first_found; 2398 PetscErrorCode ierr; 2399 2400 PetscFunctionBegin; 2401 /* mpi buffers */ 2402 MPI_Comm_size(comm,&size_prec_comm); 2403 MPI_Comm_rank(comm,&rank_prec_comm); 2404 j = ( !rank_prec_comm ? size_prec_comm : 0); 2405 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2406 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2407 /* get maximum size of subset */ 2408 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2409 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2410 max_local = 0; 2411 if (n_local_dofs) { 2412 max_local = temp_global_dofs[0]; 2413 for (i=1;i<n_local_dofs;i++) { 2414 if (max_local < temp_global_dofs[i] ) { 2415 max_local = temp_global_dofs[i]; 2416 } 2417 } 2418 } 2419 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2420 max_global++; 2421 max_local = 0; 2422 if (n_local_dofs) { 2423 max_local = local_dofs[0]; 2424 for (i=1;i<n_local_dofs;i++) { 2425 if (max_local < local_dofs[i] ) { 2426 max_local = local_dofs[i]; 2427 } 2428 } 2429 } 2430 max_local++; 2431 /* allocate workspace */ 2432 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2433 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2434 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2435 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2436 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2437 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2438 /* create scatter */ 2439 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2440 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2441 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2442 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2443 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2444 /* init array */ 2445 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2446 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2447 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2448 if (local_dofs_mult) { 2449 for (i=0;i<n_local_dofs;i++) { 2450 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2451 } 2452 } else { 2453 for (i=0;i<n_local_dofs;i++) { 2454 array[local_dofs[i]]=1.0; 2455 } 2456 } 2457 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2458 /* scatter into global vec and get total number of global dofs */ 2459 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2460 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2461 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2462 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2463 /* Fill global_vec with cumulative function for global numbering */ 2464 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2465 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2466 nlocals = 0; 2467 first_index = -1; 2468 first_found = PETSC_FALSE; 2469 for (i=0;i<s;i++) { 2470 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2471 first_found = PETSC_TRUE; 2472 first_index = i; 2473 } 2474 nlocals += (PetscInt)PetscRealPart(array[i]); 2475 } 2476 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2477 if (!rank_prec_comm) { 2478 dof_displs[0]=0; 2479 for (i=1;i<size_prec_comm;i++) { 2480 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2481 } 2482 } 2483 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2484 if (first_found) { 2485 array[first_index] += (PetscScalar)nlocals; 2486 old_index = first_index; 2487 for (i=first_index+1;i<s;i++) { 2488 if (PetscRealPart(array[i]) > 0.0) { 2489 array[i] += array[old_index]; 2490 old_index = i; 2491 } 2492 } 2493 } 2494 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2495 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2496 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2497 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2498 /* get global ordering of local dofs */ 2499 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2500 if (local_dofs_mult) { 2501 for (i=0;i<n_local_dofs;i++) { 2502 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2503 } 2504 } else { 2505 for (i=0;i<n_local_dofs;i++) { 2506 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2507 } 2508 } 2509 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2510 /* free workspace */ 2511 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2512 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2513 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2514 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2515 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2516 /* return pointer to global ordering of local dofs */ 2517 *global_numbering_subset = temp_global_dofs; 2518 PetscFunctionReturn(0); 2519 } 2520 2521 #undef __FUNCT__ 2522 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2523 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2524 { 2525 PetscInt i,j; 2526 PetscScalar *alphas; 2527 PetscErrorCode ierr; 2528 2529 PetscFunctionBegin; 2530 /* this implements stabilized Gram-Schmidt */ 2531 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2532 for (i=0;i<n;i++) { 2533 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2534 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2535 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2536 } 2537 ierr = PetscFree(alphas);CHKERRQ(ierr); 2538 PetscFunctionReturn(0); 2539 } 2540 2541 /* TODO 2542 - now preallocation is done assuming SEQDENSE local matrices 2543 */ 2544 #undef __FUNCT__ 2545 #define __FUNCT__ "MatISGetMPIXAIJ" 2546 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M) 2547 { 2548 Mat new_mat; 2549 Mat_IS *matis = (Mat_IS*)(mat->data); 2550 /* info on mat */ 2551 /* ISLocalToGlobalMapping rmapping,cmapping; */ 2552 PetscInt bs,rows,cols; 2553 PetscInt lrows,lcols; 2554 PetscInt local_rows,local_cols; 2555 PetscBool isdense; 2556 /* values insertion */ 2557 PetscScalar *array; 2558 PetscInt *local_indices,*global_indices; 2559 /* work */ 2560 PetscInt i,j,index_row; 2561 PetscErrorCode ierr; 2562 2563 PetscFunctionBegin; 2564 /* MISSING CHECKS 2565 - rectangular case not covered (it is not allowed by MATIS) 2566 */ 2567 /* get info from mat */ 2568 /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */ 2569 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2570 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2571 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2572 2573 /* work */ 2574 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2575 for (i=0;i<local_rows;i++) local_indices[i]=i; 2576 /* map indices of local mat to global values */ 2577 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2578 /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */ 2579 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2580 2581 if (reuse==MAT_INITIAL_MATRIX) { 2582 Vec vec_dnz,vec_onz; 2583 PetscScalar *my_dnz,*my_onz; 2584 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2585 PetscInt index_col,owner; 2586 PetscMPIInt nsubdomains; 2587 2588 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2589 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2590 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2591 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2592 ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr); 2593 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2594 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2595 2596 /* 2597 preallocation 2598 */ 2599 2600 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2601 /* 2602 Some vectors are needed to sum up properly on shared interface dofs. 2603 Preallocation macros cannot do the job. 2604 Note that preallocation is not exact, since it overestimates nonzeros 2605 */ 2606 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2607 /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */ 2608 ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr); 2609 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2610 /* All processes need to compute entire row ownership */ 2611 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2612 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2613 for (i=0;i<nsubdomains;i++) { 2614 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2615 row_ownership[j]=i; 2616 } 2617 } 2618 2619 /* 2620 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2621 then, they will be summed up properly. This way, preallocation is always sufficient 2622 */ 2623 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2624 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2625 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2626 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2627 for (i=0;i<local_rows;i++) { 2628 index_row = global_indices[i]; 2629 for (j=i;j<local_rows;j++) { 2630 owner = row_ownership[index_row]; 2631 index_col = global_indices[j]; 2632 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2633 my_dnz[i] += 1.0; 2634 } else { /* offdiag block */ 2635 my_onz[i] += 1.0; 2636 } 2637 /* same as before, interchanging rows and cols */ 2638 if (i != j) { 2639 owner = row_ownership[index_col]; 2640 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2641 my_dnz[j] += 1.0; 2642 } else { 2643 my_onz[j] += 1.0; 2644 } 2645 } 2646 } 2647 } 2648 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2649 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2650 if (local_rows) { /* multilevel guard */ 2651 ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2652 ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2653 } 2654 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2655 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2656 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2657 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2658 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2659 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2660 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2661 2662 /* set computed preallocation in dnz and onz */ 2663 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2664 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2665 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2666 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2667 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2668 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2669 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2670 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2671 2672 /* Resize preallocation if overestimated */ 2673 for (i=0;i<lrows;i++) { 2674 dnz[i] = PetscMin(dnz[i],lcols); 2675 onz[i] = PetscMin(onz[i],cols-lcols); 2676 } 2677 /* set preallocation */ 2678 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2679 for (i=0;i<lrows/bs;i++) { 2680 dnz[i] = dnz[i*bs]/bs; 2681 onz[i] = onz[i*bs]/bs; 2682 } 2683 ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2684 for (i=0;i<lrows/bs;i++) { 2685 dnz[i] = dnz[i]-i; 2686 } 2687 ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2688 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2689 *M = new_mat; 2690 } else { 2691 PetscInt mbs,mrows,mcols; 2692 /* some checks */ 2693 ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); 2694 ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); 2695 if (mrows != rows) { 2696 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); 2697 } 2698 if (mrows != rows) { 2699 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); 2700 } 2701 if (mbs != bs) { 2702 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); 2703 } 2704 ierr = MatZeroEntries(*M);CHKERRQ(ierr); 2705 } 2706 /* set local to global mappings */ 2707 /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */ 2708 /* Set values */ 2709 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2710 if (isdense) { /* special case for dense local matrices */ 2711 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2712 ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr); 2713 ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2714 ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr); 2715 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2716 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2717 } else { /* very basic values insertion for all other matrix types */ 2718 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2719 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2720 for (i=0;i<local_rows;i++) { 2721 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2722 /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */ 2723 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2724 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2725 ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2726 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2727 } 2728 } 2729 ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2730 ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2731 if (isdense) { 2732 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2733 } 2734 PetscFunctionReturn(0); 2735 } 2736 2737 #undef __FUNCT__ 2738 #define __FUNCT__ "MatISGetSubassemblingPattern" 2739 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2740 { 2741 Mat subdomain_adj; 2742 IS new_ranks,ranks_send_to; 2743 MatPartitioning partitioner; 2744 Mat_IS *matis; 2745 PetscInt n_neighs,*neighs,*n_shared,**shared; 2746 PetscInt prank; 2747 PetscMPIInt size,rank,color; 2748 PetscInt *xadj,*adjncy,*oldranks; 2749 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2750 PetscInt i,j,n_subdomains,local_size,threshold=0; 2751 PetscErrorCode ierr; 2752 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2753 PetscSubcomm subcomm; 2754 2755 PetscFunctionBegin; 2756 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2757 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2758 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2759 2760 /* Get info on mapping */ 2761 matis = (Mat_IS*)(mat->data); 2762 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2763 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2764 2765 /* build local CSR graph of subdomains' connectivity */ 2766 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2767 xadj[0] = 0; 2768 xadj[1] = PetscMax(n_neighs-1,0); 2769 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2770 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2771 2772 if (threshold) { 2773 PetscInt* count,min_threshold; 2774 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2775 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2776 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2777 for (j=0;j<n_shared[i];j++) { 2778 count[shared[i][j]] += 1; 2779 } 2780 } 2781 /* adapt threshold since we dont want to lose significant connections */ 2782 min_threshold = n_neighs; 2783 for (i=1;i<n_neighs;i++) { 2784 for (j=0;j<n_shared[i];j++) { 2785 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2786 } 2787 } 2788 threshold = PetscMax(min_threshold+1,threshold); 2789 xadj[1] = 0; 2790 for (i=1;i<n_neighs;i++) { 2791 for (j=0;j<n_shared[i];j++) { 2792 if (count[shared[i][j]] < threshold) { 2793 adjncy[xadj[1]] = neighs[i]; 2794 adjncy_wgt[xadj[1]] = n_shared[i]; 2795 xadj[1]++; 2796 break; 2797 } 2798 } 2799 } 2800 ierr = PetscFree(count);CHKERRQ(ierr); 2801 } else { 2802 if (xadj[1]) { 2803 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2804 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2805 } 2806 } 2807 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2808 if (use_square) { 2809 for (i=0;i<xadj[1];i++) { 2810 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2811 } 2812 } 2813 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2814 2815 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2816 2817 /* 2818 Restrict work on active processes only. 2819 */ 2820 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2821 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2822 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2823 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2824 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2825 if (color) { 2826 ierr = PetscFree(xadj);CHKERRQ(ierr); 2827 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2828 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2829 } else { 2830 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2831 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2832 prank = rank; 2833 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2834 /* 2835 for (i=0;i<size;i++) { 2836 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2837 } 2838 */ 2839 for (i=0;i<xadj[1];i++) { 2840 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2841 } 2842 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2843 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2844 n_subdomains = (PetscInt)size; 2845 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2846 2847 /* Partition */ 2848 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2849 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2850 if (use_vwgt) { 2851 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2852 v_wgt[0] = local_size; 2853 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2854 } 2855 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); */ 2856 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2857 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2858 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2859 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2860 2861 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2862 /* ranks_send_to_idx[0] = oldranks[is_indices[0]]; */ /* contiguos set of processes */ 2863 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2864 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2865 /* clean up */ 2866 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2867 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2868 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2869 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2870 } 2871 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2872 2873 /* assemble parallel IS for sends */ 2874 i = 1; 2875 if (color) i=0; 2876 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2877 2878 /* get back IS */ 2879 *is_sends = ranks_send_to; 2880 PetscFunctionReturn(0); 2881 } 2882 2883 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2884 2885 #undef __FUNCT__ 2886 #define __FUNCT__ "MatISSubassemble" 2887 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, MatReuse reuse, Mat *mat_n) 2888 { 2889 Mat local_mat; 2890 Mat_IS *matis; 2891 IS is_sends_internal; 2892 PetscInt rows,cols; 2893 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2894 PetscBool ismatis,isdense; 2895 ISLocalToGlobalMapping l2gmap; 2896 PetscInt* l2gmap_indices; 2897 const PetscInt* is_indices; 2898 MatType new_local_type; 2899 MatTypePrivate new_local_type_private; 2900 /* buffers */ 2901 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2902 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2903 /* MPI */ 2904 MPI_Comm comm; 2905 PetscMPIInt n_sends,n_recvs,commsize; 2906 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2907 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2908 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2909 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2910 PetscErrorCode ierr; 2911 2912 PetscFunctionBegin; 2913 /* checks */ 2914 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2915 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2916 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2917 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2918 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2919 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2920 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2921 if (reuse == MAT_REUSE_MATRIX) { 2922 PetscInt mrows,mcols,mnrows,mncols; 2923 PetscBool ismatis; 2924 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2925 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2926 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2927 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2928 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2929 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2930 } 2931 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2932 PetscValidLogicalCollectiveInt(mat,bs,0); 2933 /* prepare IS for sending if not provided */ 2934 if (!is_sends) { 2935 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2936 ierr = MatISGetSubassemblingPattern(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2937 } else { 2938 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2939 is_sends_internal = is_sends; 2940 } 2941 2942 /* get pointer of MATIS data */ 2943 matis = (Mat_IS*)mat->data; 2944 2945 /* get comm */ 2946 comm = PetscObjectComm((PetscObject)mat); 2947 2948 /* compute number of sends */ 2949 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2950 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2951 2952 /* compute number of receives */ 2953 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2954 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2955 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2956 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2957 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2958 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2959 ierr = PetscFree(iflags);CHKERRQ(ierr); 2960 2961 /* prepare send/receive buffers */ 2962 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2963 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2964 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2965 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2966 2967 /* Get data from local mat */ 2968 if (!isdense) { 2969 /* TODO: See below some guidelines on how to prepare the local buffers */ 2970 /* 2971 send_buffer_vals should contain the raw values of the local matrix 2972 send_buffer_idxs should contain: 2973 - MatType_PRIVATE type 2974 - PetscInt size_of_l2gmap 2975 - PetscInt global_row_indices[size_of_l2gmap] 2976 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2977 */ 2978 } else { 2979 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2980 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2981 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2982 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2983 send_buffer_idxs[1] = i; 2984 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2985 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2986 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2987 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2988 for (i=0;i<n_sends;i++) { 2989 ilengths_vals[is_indices[i]] = len*len; 2990 ilengths_idxs[is_indices[i]] = len+2; 2991 } 2992 } 2993 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2994 buf_size_idxs = 0; 2995 buf_size_vals = 0; 2996 for (i=0;i<n_recvs;i++) { 2997 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2998 buf_size_vals += (PetscInt)olengths_vals[i]; 2999 } 3000 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 3001 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 3002 3003 /* get new tags for clean communications */ 3004 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3005 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3006 3007 /* allocate for requests */ 3008 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 3009 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 3010 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 3011 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 3012 3013 /* communications */ 3014 ptr_idxs = recv_buffer_idxs; 3015 ptr_vals = recv_buffer_vals; 3016 for (i=0;i<n_recvs;i++) { 3017 source_dest = onodes[i]; 3018 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3019 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3020 ptr_idxs += olengths_idxs[i]; 3021 ptr_vals += olengths_vals[i]; 3022 } 3023 for (i=0;i<n_sends;i++) { 3024 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3025 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3026 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3027 } 3028 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3029 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3030 3031 /* assemble new l2g map */ 3032 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3033 ptr_idxs = recv_buffer_idxs; 3034 buf_size_idxs = 0; 3035 for (i=0;i<n_recvs;i++) { 3036 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3037 ptr_idxs += olengths_idxs[i]; 3038 } 3039 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 3040 ptr_idxs = recv_buffer_idxs; 3041 buf_size_idxs = 0; 3042 for (i=0;i<n_recvs;i++) { 3043 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3044 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3045 ptr_idxs += olengths_idxs[i]; 3046 } 3047 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3048 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3049 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3050 3051 /* infer new local matrix type from received local matrices type */ 3052 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3053 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3054 new_local_type_private = MATAIJ_PRIVATE; 3055 new_local_type = MATSEQAIJ; 3056 if (n_recvs) { 3057 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3058 ptr_idxs = recv_buffer_idxs; 3059 for (i=0;i<n_recvs;i++) { 3060 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3061 new_local_type_private = MATAIJ_PRIVATE; 3062 break; 3063 } 3064 ptr_idxs += olengths_idxs[i]; 3065 } 3066 switch (new_local_type_private) { 3067 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 3068 new_local_type = MATSEQAIJ; 3069 bs = 1; 3070 break; 3071 case MATAIJ_PRIVATE: 3072 new_local_type = MATSEQAIJ; 3073 bs = 1; 3074 break; 3075 case MATBAIJ_PRIVATE: 3076 new_local_type = MATSEQBAIJ; 3077 break; 3078 case MATSBAIJ_PRIVATE: 3079 new_local_type = MATSEQSBAIJ; 3080 break; 3081 default: 3082 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3083 break; 3084 } 3085 } 3086 3087 /* create MATIS object if needed */ 3088 if (reuse == MAT_INITIAL_MATRIX) { 3089 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3090 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3091 } else { 3092 /* it also destroys the local matrices */ 3093 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3094 } 3095 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3096 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3097 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3098 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3099 3100 /* set values */ 3101 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3102 ptr_vals = recv_buffer_vals; 3103 ptr_idxs = recv_buffer_idxs; 3104 for (i=0;i<n_recvs;i++) { 3105 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3106 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3107 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3108 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3109 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3110 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3111 } 3112 ptr_idxs += olengths_idxs[i]; 3113 ptr_vals += olengths_vals[i]; 3114 } 3115 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3116 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3117 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3118 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3119 3120 { /* check */ 3121 Vec lvec,rvec; 3122 PetscReal infty_error; 3123 3124 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3125 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3126 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3127 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3128 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3129 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3130 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3131 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3132 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3133 } 3134 3135 /* free workspace */ 3136 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3137 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3138 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3139 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3140 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3141 if (isdense) { 3142 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3143 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3144 } else { 3145 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3146 } 3147 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3148 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3149 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3150 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3151 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3152 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3153 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3154 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3155 ierr = PetscFree(onodes);CHKERRQ(ierr); 3156 PetscFunctionReturn(0); 3157 } 3158 3159 #undef __FUNCT__ 3160 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3161 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3162 { 3163 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3164 PC_IS *pcis = (PC_IS*)pc->data; 3165 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3166 MatNullSpace CoarseNullSpace=NULL; 3167 ISLocalToGlobalMapping coarse_islg; 3168 IS coarse_is; 3169 PetscInt max_it; 3170 PetscInt im_active=-1,active_procs=-1; 3171 PC pc_temp; 3172 PCType coarse_pc_type; 3173 KSPType coarse_ksp_type; 3174 PetscBool multilevel_requested,multilevel_allowed; 3175 PetscBool setsym,issym,isherm,ispreonly,isbddc,isnn,coarse_reuse; 3176 MatStructure matstruct; 3177 PetscErrorCode ierr; 3178 3179 PetscFunctionBegin; 3180 /* Assign global numbering to coarse dofs */ 3181 if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */ 3182 PetscInt ocoarse_size; 3183 ocoarse_size = pcbddc->coarse_size; 3184 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3185 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3186 /* see if we can avoid some work */ 3187 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3188 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3189 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3190 coarse_reuse = PETSC_FALSE; 3191 } else { /* we can safely reuse already computed coarse matrix */ 3192 coarse_reuse = PETSC_TRUE; 3193 } 3194 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3195 coarse_reuse = PETSC_FALSE; 3196 } 3197 /* reset any subassembling information */ 3198 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3199 } else { /* primal space has not been changed, so we can reuse coarse matrix */ 3200 coarse_reuse = PETSC_TRUE; 3201 } 3202 3203 /* test if we can go multilevel */ 3204 multilevel_allowed = PETSC_FALSE; 3205 multilevel_requested = PETSC_FALSE; 3206 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 3207 if (multilevel_requested) { 3208 /* count "active processes" */ 3209 im_active = !!(pcis->n); 3210 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3211 if (active_procs/pcbddc->coarsening_ratio < 2) { 3212 multilevel_allowed = PETSC_FALSE; 3213 } else { 3214 multilevel_allowed = PETSC_TRUE; 3215 } 3216 } 3217 3218 /* creates temporary l2gmap and IS for coarse indexes */ 3219 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3220 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3221 3222 /* creates temporary MATIS object for coarse matrix */ 3223 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3224 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3225 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3226 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3227 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3228 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3229 3230 /* set defaults for coarse KSP and PC */ 3231 if (multilevel_allowed) { 3232 coarse_ksp_type = KSPRICHARDSON; 3233 coarse_pc_type = PCBDDC; 3234 } else { 3235 coarse_ksp_type = KSPPREONLY; 3236 coarse_pc_type = PCREDUNDANT; 3237 } 3238 3239 /* create the coarse KSP object only once with defaults */ 3240 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3241 if (!pcbddc->coarse_ksp) { 3242 char prefix[256],str_level[3]; 3243 size_t len; 3244 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3245 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3246 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3247 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is,matstruct);CHKERRQ(ierr); 3248 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3249 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3250 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3251 /* prefix */ 3252 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3253 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3254 if (!pcbddc->current_level) { 3255 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3256 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3257 } else { 3258 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3259 if (pcbddc->current_level>1) len -= 2; 3260 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3261 *(prefix+len)='\0'; 3262 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3263 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3264 } 3265 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3266 } 3267 /* allow user customization */ 3268 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3269 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3270 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3271 3272 /* get some info after set from options */ 3273 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3274 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3275 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3276 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3277 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3278 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3279 isbddc = PETSC_FALSE; 3280 } 3281 3282 /* propagate BDDC info to the next level */ 3283 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3284 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3285 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3286 if (isbddc) { /* protects from unneded computations */ 3287 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3288 const PetscInt *idxs; 3289 IS *c_isfordofs,c_isneumann; 3290 ISLocalToGlobalMapping tmap; 3291 3292 /* create map between primal indices (in local representative ordering) and local subdomain numbering */ 3293 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3294 /* allocate space for temporary storage */ 3295 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3296 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3297 /* dofs splitting */ 3298 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&c_isfordofs);CHKERRQ(ierr); 3299 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3300 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3301 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3302 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3303 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3304 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3305 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3306 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&c_isfordofs[i]);CHKERRQ(ierr); 3307 /* ierr = ISView(c_isfordofs[i],0);CHKERRQ(ierr); */ 3308 } 3309 ierr = PCBDDCSetDofsSplitting(pc_temp,pcbddc->n_ISForDofsLocal,c_isfordofs);CHKERRQ(ierr); 3310 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3311 ierr = ISDestroy(&c_isfordofs[i]);CHKERRQ(ierr); 3312 } 3313 ierr = PetscFree(c_isfordofs);CHKERRQ(ierr); 3314 /* neumann boundaries */ 3315 if (pcbddc->NeumannBoundariesLocal) { 3316 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3317 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3318 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3319 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3320 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3321 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3322 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&c_isneumann);CHKERRQ(ierr); 3323 /* ierr = ISView(c_isneumann,0);CHKERRQ(ierr); */ 3324 ierr = PCBDDCSetNeumannBoundaries(pc_temp,c_isneumann);CHKERRQ(ierr); 3325 } 3326 ierr = ISDestroy(&c_isneumann);CHKERRQ(ierr); 3327 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3328 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3329 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3330 } 3331 3332 /* destroy no longer needed map */ 3333 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3334 3335 /* assemble coarse matrix */ 3336 if (isbddc || isnn) { 3337 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3338 ierr = MatISGetSubassemblingPattern(coarse_mat_is,pcbddc->coarsening_ratio,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3339 if (pcbddc->dbg_flag) { 3340 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3341 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3342 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 3343 } 3344 } 3345 if (coarse_reuse) { 3346 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3347 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3348 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,pcbddc->coarsening_ratio,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr); 3349 } else { 3350 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,pcbddc->coarsening_ratio,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3351 } 3352 } else { 3353 if (coarse_reuse) { 3354 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3355 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3356 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr); 3357 } else { 3358 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3359 } 3360 } 3361 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3362 3363 /* create local to global scatters for coarse problem */ 3364 if (pcbddc->new_primal_space) { 3365 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3366 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 3367 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3368 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3369 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3370 } 3371 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3372 3373 /* propagate symmetry info to coarse matrix */ 3374 issym = PETSC_FALSE; 3375 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3376 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3377 isherm = PETSC_FALSE; 3378 ierr = MatIsHermitianKnown(pc->pmat,&setsym,&isherm);CHKERRQ(ierr); 3379 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3380 3381 /* Compute coarse null space (special handling by BDDC only) */ 3382 if (pcbddc->NullSpace) { 3383 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3384 if (isbddc) { 3385 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3386 } else { 3387 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3388 } 3389 } 3390 3391 /* set operators */ 3392 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3393 3394 /* additional KSP customization */ 3395 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3396 if (max_it < 5) { 3397 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3398 } 3399 3400 /* print some info if requested */ 3401 if (pcbddc->dbg_flag) { 3402 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3403 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3404 if (!multilevel_allowed) { 3405 if (multilevel_requested) { 3406 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3407 } else if (pcbddc->max_levels) { 3408 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3409 } 3410 } 3411 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3412 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3413 } 3414 3415 /* setup coarse ksp */ 3416 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3417 3418 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3419 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3420 if (pcbddc->dbg_flag || !ispreonly) { 3421 KSP check_ksp; 3422 KSPType check_ksp_type; 3423 PC check_pc; 3424 Vec check_vec; 3425 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3426 PetscInt its; 3427 PetscBool compute; 3428 3429 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3430 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3431 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3432 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3433 if (ispreonly) { 3434 check_ksp_type = KSPPREONLY; 3435 compute = PETSC_FALSE; 3436 } else { 3437 check_ksp_type = KSPGMRES; 3438 compute = PETSC_TRUE; 3439 } 3440 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3441 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3442 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3443 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3444 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3445 /* create random vec */ 3446 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3447 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3448 if (CoarseNullSpace) { 3449 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3450 } 3451 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3452 /* solve coarse problem */ 3453 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3454 if (CoarseNullSpace) { 3455 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3456 } 3457 /* set eigenvalue estimation if preonly has not been requested */ 3458 if (!ispreonly) { 3459 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3460 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Coarse problem eigenvalues: %1.6e %1.6e\n",lambda_min,lambda_max);CHKERRQ(ierr); */ 3461 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3462 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3463 } 3464 /* check coarse problem residual error */ 3465 if (pcbddc->dbg_flag) { 3466 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3467 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3468 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3469 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3470 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3471 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3472 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3473 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3474 if (!ispreonly) { 3475 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3476 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3477 } 3478 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3479 } 3480 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3481 } 3482 3483 /* print additional info */ 3484 if (pcbddc->dbg_flag) { 3485 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3486 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3487 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3488 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3489 } 3490 3491 /* free memory */ 3492 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3493 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3494 PetscFunctionReturn(0); 3495 } 3496 3497 #undef __FUNCT__ 3498 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3499 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3500 { 3501 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3502 PC_IS* pcis = (PC_IS*)pc->data; 3503 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3504 PetscInt i,coarse_size; 3505 PetscInt *local_primal_indices; 3506 PetscErrorCode ierr; 3507 3508 PetscFunctionBegin; 3509 /* Compute global number of coarse dofs */ 3510 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3511 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3512 } 3513 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3514 3515 /* check numbering */ 3516 if (pcbddc->dbg_flag) { 3517 PetscScalar coarsesum,*array; 3518 3519 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3520 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3521 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3522 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3523 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3524 for (i=0;i<pcbddc->local_primal_size;i++) { 3525 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3526 } 3527 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3528 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3529 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3530 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3531 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3532 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3533 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3534 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3535 for (i=0;i<pcis->n;i++) { 3536 if (array[i] == 1.0) { 3537 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3538 } 3539 } 3540 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3541 for (i=0;i<pcis->n;i++) { 3542 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3543 } 3544 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3545 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3546 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3547 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3548 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3549 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3550 if (pcbddc->dbg_flag > 1) { 3551 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3552 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3553 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3554 for (i=0;i<pcbddc->local_primal_size;i++) { 3555 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3556 } 3557 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3558 } 3559 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3560 } 3561 /* get back data */ 3562 *coarse_size_n = coarse_size; 3563 *local_primal_indices_n = local_primal_indices; 3564 PetscFunctionReturn(0); 3565 } 3566 3567 #undef __FUNCT__ 3568 #define __FUNCT__ "PCBDDCGlobalToLocal" 3569 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis) 3570 { 3571 PC_IS* pcis = (PC_IS*)pc->data; 3572 IS localis_t; 3573 PetscInt i,lsize,*idxs; 3574 PetscScalar *vals; 3575 PetscErrorCode ierr; 3576 3577 PetscFunctionBegin; 3578 /* get dirichlet indices in local ordering exploiting local to global map */ 3579 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 3580 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3581 for (i=0;i<lsize;i++) vals[i] = 1.0; 3582 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3583 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3584 if (idxs) { /* multilevel guard */ 3585 ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3586 } 3587 ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr); 3588 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3589 ierr = PetscFree(vals);CHKERRQ(ierr); 3590 ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr); 3591 /* now compute dirichlet set in local ordering */ 3592 ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3593 ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3594 ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3595 for (i=0,lsize=0;i<pcis->n;i++) { 3596 if (vals[i] == 1.0) { 3597 lsize++; 3598 } 3599 } 3600 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 3601 for (i=0,lsize=0;i<pcis->n;i++) { 3602 if (vals[i] == 1.0) { 3603 idxs[lsize++] = i; 3604 } 3605 } 3606 ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3607 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 3608 *localis = localis_t; 3609 PetscFunctionReturn(0); 3610 } 3611