1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCSetUpSolvers" 10 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscScalar *coarse_submat_vals; 14 PetscErrorCode ierr; 15 16 PetscFunctionBegin; 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local neumann solver ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "PCBDDCResetSolvers" 84 PetscErrorCode PCBDDCResetSolvers(PC pc) 85 { 86 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 87 PetscErrorCode ierr; 88 89 PetscFunctionBegin; 90 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 91 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 97 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 99 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 102 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 107 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 108 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 109 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 110 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 111 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 112 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 113 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 114 PetscFunctionReturn(0); 115 } 116 117 #undef __FUNCT__ 118 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 119 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 120 { 121 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 122 PC_IS *pcis = (PC_IS*)pc->data; 123 VecType impVecType; 124 PetscInt n_constraints,n_R,old_size; 125 PetscErrorCode ierr; 126 127 PetscFunctionBegin; 128 if (!pcbddc->ConstraintMatrix) { 129 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 130 } 131 /* get sizes */ 132 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 133 n_R = pcis->n-pcbddc->n_actual_vertices; 134 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 135 /* local work vectors (try to avoid unneeded work)*/ 136 /* R nodes */ 137 old_size = -1; 138 if (pcbddc->vec1_R) { 139 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 140 } 141 if (n_R != old_size) { 142 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 143 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 144 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 145 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 146 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 147 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 148 } 149 /* local primal dofs */ 150 old_size = -1; 151 if (pcbddc->vec1_P) { 152 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 153 } 154 if (pcbddc->local_primal_size != old_size) { 155 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 156 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 157 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 158 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 159 } 160 /* local explicit constraints */ 161 old_size = -1; 162 if (pcbddc->vec1_C) { 163 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 164 } 165 if (n_constraints && n_constraints != old_size) { 166 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 167 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 168 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 169 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "PCBDDCSetUpCorrection" 176 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 177 { 178 PetscErrorCode ierr; 179 /* pointers to pcis and pcbddc */ 180 PC_IS* pcis = (PC_IS*)pc->data; 181 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 182 /* submatrices of local problem */ 183 Mat A_RV,A_VR,A_VV; 184 /* working matrices */ 185 Mat M1,M2,M3,C_CR; 186 /* working vectors */ 187 Vec vec1_C,vec2_C,vec1_V,vec2_V; 188 /* additional working stuff */ 189 IS is_aux; 190 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 191 const PetscScalar *array,*row_cmat_values; 192 const PetscInt *row_cmat_indices,*idx_R_local; 193 PetscInt *idx_V_B,*auxindices; 194 PetscInt n_vertices,n_constraints,size_of_constraint; 195 PetscInt i,j,n_R,n_D,n_B; 196 PetscBool unsymmetric_check; 197 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 198 MatType impMatType; 199 /* some shortcuts to scalars */ 200 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 201 /* for debugging purposes */ 202 PetscReal *coarsefunctions_errors,*constraints_errors; 203 204 PetscFunctionBegin; 205 /* get number of vertices (corners plus constraints with change of basis) 206 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 207 n_vertices = pcbddc->n_actual_vertices; 208 n_constraints = pcbddc->local_primal_size-n_vertices; 209 /* Set Non-overlapping dimensions */ 210 n_B = pcis->n_B; n_D = pcis->n - n_B; 211 n_R = pcis->n-n_vertices; 212 213 /* Set types for local objects needed by BDDC precondtioner */ 214 impMatType = MATSEQDENSE; 215 216 /* Allocating some extra storage just to be safe */ 217 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 218 for (i=0;i<pcis->n;i++) auxindices[i]=i; 219 220 /* vertices in boundary numbering */ 221 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 222 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 223 if (i != n_vertices) { 224 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 225 } 226 227 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 228 if (n_constraints) { 229 /* see if we can save some allocations */ 230 if (pcbddc->local_auxmat2) { 231 PetscInt on_R,on_constraints; 232 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 233 if (on_R != n_R || on_constraints != n_constraints) { 234 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 235 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 236 } 237 } 238 /* work vectors */ 239 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 240 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 241 /* auxiliary matrices */ 242 if (!pcbddc->local_auxmat2) { 243 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 244 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 245 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 246 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 247 } 248 249 /* Extract constraints on R nodes: C_{CR} */ 250 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 251 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 252 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 253 254 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 255 for (i=0;i<n_constraints;i++) { 256 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 257 /* Get row of constraint matrix in R numbering */ 258 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 259 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 260 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 261 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 262 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 263 /* Solve for row of constraint matrix in R numbering */ 264 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 265 /* Set values in local_auxmat2 */ 266 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 267 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 268 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 269 } 270 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 271 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 272 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 273 274 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 275 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 276 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 277 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 278 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 279 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 280 ierr = MatSetUp(M1);CHKERRQ(ierr); 281 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 282 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 283 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 284 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 285 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 286 ierr = MatDestroy(&M2);CHKERRQ(ierr); 287 ierr = MatDestroy(&M3);CHKERRQ(ierr); 288 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 289 if (!pcbddc->local_auxmat1) { 290 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 291 } else { 292 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 293 } 294 } 295 296 /* Get submatrices from subdomain matrix */ 297 if (n_vertices) { 298 PetscInt ibs,mbs; 299 PetscBool issbaij; 300 Mat newmat; 301 302 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 303 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 304 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 305 if (ibs != mbs) { /* need to convert to SEQAIJ */ 306 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 307 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 308 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 309 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 310 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 311 } else { 312 /* this is safe */ 313 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 314 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 315 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 316 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 317 /* which of the two approaches is faster? */ 318 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 319 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 320 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 321 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 322 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 323 } else { 324 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 325 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 326 } 327 } 328 ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 329 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 330 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 331 } 332 333 /* Matrix of coarse basis functions (local) */ 334 if (pcbddc->coarse_phi_B) { 335 PetscInt on_B,on_primal; 336 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 337 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 338 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 339 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 340 } 341 } 342 if (pcbddc->coarse_phi_D) { 343 PetscInt on_D,on_primal; 344 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 345 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 346 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 347 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 348 } 349 } 350 if (!pcbddc->coarse_phi_B) { 351 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 352 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 353 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 354 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 355 } 356 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 357 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 358 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 359 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 360 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 361 } 362 363 if (pcbddc->dbg_flag) { 364 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 365 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 366 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 367 } 368 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 369 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 370 371 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 372 373 /* vertices */ 374 for (i=0;i<n_vertices;i++) { 375 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 376 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 377 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 378 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 379 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 380 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 381 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 382 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 383 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 384 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 385 if (n_constraints) { 386 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 387 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 388 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 389 } 390 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 391 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 392 393 /* Set values in coarse basis function and subdomain part of coarse_mat */ 394 /* coarse basis functions */ 395 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 396 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 397 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 398 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 399 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 400 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 401 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 402 if (pcbddc->switch_static || pcbddc->dbg_flag) { 403 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 404 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 405 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 406 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 407 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 408 } 409 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 410 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 411 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 412 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 413 if (n_constraints) { 414 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 415 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 416 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 417 } 418 419 /* check */ 420 if (pcbddc->dbg_flag) { 421 /* assemble subdomain vector on local nodes */ 422 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 423 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 424 if (n_R) { 425 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 426 } 427 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 428 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 429 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 430 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 431 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 432 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 433 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 434 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 436 if (n_constraints) { 437 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 438 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 440 } 441 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 442 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 443 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 444 /* check saddle point solution */ 445 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 446 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 447 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 448 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 449 /* shift by the identity matrix */ 450 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 451 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 452 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 453 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 454 } 455 } 456 457 /* constraints */ 458 for (i=0;i<n_constraints;i++) { 459 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 460 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 461 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 462 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 463 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 464 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 465 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 466 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 467 if (n_vertices) { 468 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 469 } 470 /* Set values in coarse basis function and subdomain part of coarse_mat */ 471 /* coarse basis functions */ 472 j = i+n_vertices; /* don't touch this! */ 473 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 474 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 475 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 476 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 477 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 478 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 479 if (pcbddc->switch_static || pcbddc->dbg_flag) { 480 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 481 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 482 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 483 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 484 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 485 } 486 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 487 if (n_vertices) { 488 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 489 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 490 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 491 } 492 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 493 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 494 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 495 496 if (pcbddc->dbg_flag) { 497 /* assemble subdomain vector on nodes */ 498 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 499 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 500 if (n_R) { 501 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 502 } 503 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 504 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 505 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 506 /* assemble subdomain vector of lagrange multipliers */ 507 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 508 if (n_vertices) { 509 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 510 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 511 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 512 } 513 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 514 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 515 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 516 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 517 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 518 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 519 /* check saddle point solution */ 520 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 521 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 522 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 523 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 524 /* shift by the identity matrix */ 525 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 526 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 527 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 528 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 529 } 530 } 531 /* call assembling routines for local coarse basis */ 532 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 533 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 534 if (pcbddc->switch_static || pcbddc->dbg_flag) { 535 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 536 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 537 } 538 539 /* compute other basis functions for non-symmetric problems */ 540 /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/ 541 { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */ 542 PetscBool setsym; 543 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr); 544 if (!setsym) pcbddc->issym = PETSC_FALSE; 545 } 546 547 if (!pcbddc->issym) { 548 if (!pcbddc->coarse_psi_B) { 549 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 550 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 551 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 552 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 553 } 554 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 555 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 556 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 557 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 558 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 559 } 560 for (i=0;i<pcbddc->local_primal_size;i++) { 561 if (n_constraints) { 562 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 563 for (j=0;j<n_constraints;j++) { 564 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 565 } 566 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 567 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 568 } 569 if (i<n_vertices) { 570 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 571 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 572 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 573 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 574 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 575 if (n_constraints) { 576 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 577 } 578 } else { 579 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 580 } 581 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 582 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 583 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 584 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 585 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 586 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 587 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 588 if (i<n_vertices) { 589 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 590 } 591 if (pcbddc->switch_static || pcbddc->dbg_flag) { 592 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 593 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 594 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 595 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 596 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 597 } 598 599 if (pcbddc->dbg_flag) { 600 /* assemble subdomain vector on nodes */ 601 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 602 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 603 if (n_R) { 604 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 605 } 606 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 607 if (i<n_vertices) { 608 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 609 } 610 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 611 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 612 /* assemble subdomain vector of lagrange multipliers */ 613 for (j=0;j<pcbddc->local_primal_size;j++) { 614 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 615 } 616 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 617 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 618 /* check saddle point solution */ 619 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 620 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 621 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 622 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 623 /* shift by the identity matrix */ 624 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 625 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 626 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 627 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 628 } 629 } 630 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 631 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 632 if (pcbddc->switch_static || pcbddc->dbg_flag) { 633 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 634 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 635 } 636 unsymmetric_check = PETSC_TRUE; 637 } else { /* take references to already computed coarse basis */ 638 unsymmetric_check = PETSC_FALSE; 639 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 640 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 641 if (pcbddc->coarse_phi_D) { 642 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 643 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 644 } 645 } 646 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 647 /* Checking coarse_sub_mat and coarse basis functios */ 648 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 649 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 650 if (pcbddc->dbg_flag) { 651 Mat coarse_sub_mat; 652 Mat AUXMAT,TM1,TM2,TM3,TM4; 653 Mat coarse_phi_D,coarse_phi_B; 654 Mat coarse_psi_D,coarse_psi_B; 655 Mat A_II,A_BB,A_IB,A_BI; 656 MatType checkmattype=MATSEQAIJ; 657 PetscReal real_value; 658 659 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 660 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 661 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 662 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 663 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 664 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 665 if (unsymmetric_check) { 666 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 667 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 668 } 669 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 670 671 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 672 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 673 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 674 if (unsymmetric_check) { 675 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 676 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 677 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 678 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 679 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 680 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 681 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 682 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 683 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 684 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 685 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 686 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 687 } else { 688 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 689 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 690 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 691 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 692 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 693 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 694 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 695 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 696 } 697 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 698 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 699 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 700 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 701 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 702 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 703 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 704 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 708 for (i=0;i<pcbddc->local_primal_size;i++) { 709 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 710 } 711 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 712 for (i=0;i<pcbddc->local_primal_size;i++) { 713 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 714 } 715 if (unsymmetric_check) { 716 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 717 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 718 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 719 } 720 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 721 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 722 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 723 } 724 } 725 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 726 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 727 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 728 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 729 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 730 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 731 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 732 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 733 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 734 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 735 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 736 if (unsymmetric_check) { 737 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 738 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 739 } 740 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 741 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 742 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 743 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 744 } 745 /* free memory */ 746 if (n_vertices) { 747 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 748 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 749 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 750 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 751 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 752 } 753 if (n_constraints) { 754 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 755 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 756 ierr = MatDestroy(&M1);CHKERRQ(ierr); 757 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 758 } 759 ierr = PetscFree(auxindices);CHKERRQ(ierr); 760 /* get back data */ 761 *coarse_submat_vals_n = coarse_submat_vals; 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNCT__ 766 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 767 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 768 { 769 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 770 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 771 Mat *change_mat_all; 772 IS is_local,is_global; 773 PetscBool sorted,isseqaij; 774 PetscInt local_size,*idxs_perm; 775 PetscErrorCode ierr; 776 777 PetscFunctionBegin; 778 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 779 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 780 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 781 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 782 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 783 ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr); 784 if (!sorted) { 785 const PetscInt *idxs; 786 PetscInt *idxs_sorted,i; 787 788 ierr = PetscMalloc1(local_size,&idxs_perm);CHKERRQ(ierr); 789 ierr = PetscMalloc1(local_size,&idxs_sorted);CHKERRQ(ierr); 790 for (i=0;i<local_size;i++) { 791 idxs_perm[i] = i; 792 } 793 ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr); 794 ierr = PetscSortIntWithPermutation(local_size,idxs,idxs_perm);CHKERRQ(ierr); 795 for (i=0;i<local_size;i++) { 796 idxs_sorted[i] = idxs[idxs_perm[i]]; 797 } 798 ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr); 799 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 800 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr); 801 } 802 803 /* get change of basis on the whole set of local dofs */ 804 ierr = MatGetSubMatrices(ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr); 805 806 if (!sorted) { 807 Mat new_mat; 808 IS is_perm; 809 PetscInt *idxs,i; 810 811 ierr = PetscMalloc1(local_size,&idxs);CHKERRQ(ierr); 812 for (i=0;i<local_size;i++) { 813 idxs[idxs_perm[i]] = i; 814 } 815 ierr = PetscFree(idxs_perm);CHKERRQ(ierr); 816 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr); 817 ierr = ISSetPermutation(is_perm);CHKERRQ(ierr); 818 ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr); 819 ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr); 820 change_mat_all[0] = new_mat; 821 ierr = ISDestroy(&is_perm);CHKERRQ(ierr); 822 } 823 824 /* check */ 825 if (pcbddc->dbg_flag) { 826 Vec x,x_change; 827 PetscReal error; 828 829 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 830 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 831 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 832 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 833 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 834 ierr = MatMult(change_mat_all[0],matis->x,matis->y);CHKERRQ(ierr); 835 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 836 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 837 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 838 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 839 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 840 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 841 ierr = VecDestroy(&x);CHKERRQ(ierr); 842 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 843 } 844 845 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 846 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 847 if (isseqaij) { 848 ierr = MatPtAP(matis->A,change_mat_all[0],MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 849 } else { 850 Mat work_mat; 851 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 852 ierr = MatPtAP(work_mat,change_mat_all[0],MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 853 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 854 } 855 /* 856 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 857 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 858 */ 859 ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr); 860 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 861 PetscFunctionReturn(0); 862 } 863 864 #undef __FUNCT__ 865 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 866 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 867 { 868 PC_IS* pcis = (PC_IS*)(pc->data); 869 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 870 IS is_aux1,is_aux2; 871 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 872 PetscInt n_vertices,i,j,n_R,n_D,n_B; 873 PetscInt vbs,bs; 874 PetscBT bitmask; 875 PetscErrorCode ierr; 876 877 PetscFunctionBegin; 878 /* 879 No need to setup local scatters if 880 - primal space is unchanged 881 AND 882 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 883 AND 884 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 885 */ 886 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 887 PetscFunctionReturn(0); 888 } 889 /* destroy old objects */ 890 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 891 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 892 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 893 /* Set Non-overlapping dimensions */ 894 n_B = pcis->n_B; n_D = pcis->n - n_B; 895 n_vertices = pcbddc->n_actual_vertices; 896 /* create auxiliary bitmask */ 897 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 898 for (i=0;i<n_vertices;i++) { 899 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 900 } 901 902 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 903 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 904 for (i=0, n_R=0; i<pcis->n; i++) { 905 if (!PetscBTLookup(bitmask,i)) { 906 idx_R_local[n_R] = i; 907 n_R++; 908 } 909 } 910 911 /* Block code */ 912 vbs = 1; 913 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 914 if (bs>1 && !(n_vertices%bs)) { 915 PetscBool is_blocked = PETSC_TRUE; 916 PetscInt *vary; 917 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 918 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 919 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 920 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 921 for (i=0; i<n_vertices; i++) { 922 if (vary[i]!=0 && vary[i]!=bs) { 923 is_blocked = PETSC_FALSE; 924 break; 925 } 926 } 927 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 928 vbs = bs; 929 for (i=0;i<n_R/vbs;i++) { 930 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 931 } 932 } 933 ierr = PetscFree(vary);CHKERRQ(ierr); 934 } 935 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 936 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 937 938 /* print some info if requested */ 939 if (pcbddc->dbg_flag) { 940 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 941 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 942 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 943 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 944 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 945 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 946 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 947 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 948 } 949 950 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 951 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 952 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 953 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 954 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 955 for (i=0; i<n_D; i++) { 956 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 957 } 958 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 959 for (i=0, j=0; i<n_R; i++) { 960 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 961 aux_array1[j++] = i; 962 } 963 } 964 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 965 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 966 for (i=0, j=0; i<n_B; i++) { 967 if (!PetscBTLookup(bitmask,is_indices[i])) { 968 aux_array2[j++] = i; 969 } 970 } 971 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 972 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 973 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 974 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 975 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 976 977 if (pcbddc->switch_static || pcbddc->dbg_flag) { 978 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 979 for (i=0, j=0; i<n_R; i++) { 980 if (PetscBTLookup(bitmask,idx_R_local[i])) { 981 aux_array1[j++] = i; 982 } 983 } 984 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 985 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 986 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 987 } 988 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 989 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 996 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 997 { 998 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 999 PC_IS *pcis = (PC_IS*)pc->data; 1000 PC pc_temp; 1001 Mat A_RR; 1002 MatReuse reuse; 1003 PetscScalar m_one = -1.0; 1004 PetscReal value; 1005 PetscInt n_D,n_R,ibs,mbs; 1006 PetscBool use_exact,use_exact_reduced,issbaij; 1007 PetscErrorCode ierr; 1008 /* prefixes stuff */ 1009 char dir_prefix[256],neu_prefix[256],str_level[16]; 1010 size_t len; 1011 1012 PetscFunctionBegin; 1013 1014 /* compute prefixes */ 1015 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1016 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1017 if (!pcbddc->current_level) { 1018 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1019 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1020 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1021 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1022 } else { 1023 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1024 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1025 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1026 len -= 15; /* remove "pc_bddc_coarse_" */ 1027 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1028 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1029 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1030 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1031 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1032 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1033 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1034 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1035 } 1036 1037 /* DIRICHLET PROBLEM */ 1038 if (dirichlet) { 1039 /* Matrix for Dirichlet problem is pcis->A_II */ 1040 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1041 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1042 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1043 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1044 /* default */ 1045 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1046 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1047 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1048 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1049 if (issbaij) { 1050 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1051 } else { 1052 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1053 } 1054 /* Allow user's customization */ 1055 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1056 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1057 } 1058 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1059 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1060 if (!n_D) { 1061 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1062 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1063 } 1064 /* Set Up KSP for Dirichlet problem of BDDC */ 1065 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1066 /* set ksp_D into pcis data */ 1067 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1068 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1069 pcis->ksp_D = pcbddc->ksp_D; 1070 } 1071 1072 /* NEUMANN PROBLEM */ 1073 A_RR = 0; 1074 if (neumann) { 1075 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1076 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1077 if (pcbddc->ksp_R) { /* already created ksp */ 1078 PetscInt nn_R; 1079 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1080 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1081 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1082 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1083 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1084 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1085 reuse = MAT_INITIAL_MATRIX; 1086 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1087 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1088 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1089 reuse = MAT_INITIAL_MATRIX; 1090 } else { /* safe to reuse the matrix */ 1091 reuse = MAT_REUSE_MATRIX; 1092 } 1093 } 1094 /* last check */ 1095 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1096 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1097 reuse = MAT_INITIAL_MATRIX; 1098 } 1099 } else { /* first time, so we need to create the matrix */ 1100 reuse = MAT_INITIAL_MATRIX; 1101 } 1102 /* extract A_RR */ 1103 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1104 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1105 if (ibs != mbs) { 1106 Mat newmat; 1107 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1108 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1109 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1110 } else { 1111 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1112 } 1113 if (!pcbddc->ksp_R) { /* create object if not present */ 1114 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1115 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1116 /* default */ 1117 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1118 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1119 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1120 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1121 if (issbaij) { 1122 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1123 } else { 1124 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1125 } 1126 /* Allow user's customization */ 1127 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1128 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1129 } 1130 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1131 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1132 if (!n_R) { 1133 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1134 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1135 } 1136 /* Set Up KSP for Neumann problem of BDDC */ 1137 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1138 } 1139 1140 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1141 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1142 if (pcbddc->dbg_flag) { 1143 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1144 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1145 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1146 } 1147 if (dirichlet) { /* Dirichlet */ 1148 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1149 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1150 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1151 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1152 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1153 /* need to be adapted? */ 1154 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1155 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1156 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1157 /* print info */ 1158 if (pcbddc->dbg_flag) { 1159 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1160 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1161 } 1162 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1163 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1164 } 1165 } 1166 if (neumann) { /* Neumann */ 1167 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1168 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1169 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1170 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1171 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1172 /* need to be adapted? */ 1173 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1174 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1175 /* print info */ 1176 if (pcbddc->dbg_flag) { 1177 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1178 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1179 } 1180 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1181 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1182 } 1183 } 1184 } 1185 /* free Neumann problem's matrix */ 1186 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1192 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1193 { 1194 PetscErrorCode ierr; 1195 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1196 1197 PetscFunctionBegin; 1198 if (applytranspose) { 1199 if (pcbddc->local_auxmat1) { 1200 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1201 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1202 } 1203 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1204 } else { 1205 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1206 if (pcbddc->local_auxmat1) { 1207 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1208 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1209 } 1210 } 1211 PetscFunctionReturn(0); 1212 } 1213 1214 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1215 #undef __FUNCT__ 1216 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1217 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1218 { 1219 PetscErrorCode ierr; 1220 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1221 PC_IS* pcis = (PC_IS*) (pc->data); 1222 const PetscScalar zero = 0.0; 1223 1224 PetscFunctionBegin; 1225 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1226 if (applytranspose) { 1227 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1228 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1229 } else { 1230 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1231 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1232 } 1233 /* start communications from local primal nodes to rhs of coarse solver */ 1234 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1235 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1236 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1237 1238 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1239 /* TODO remove null space when doing multilevel */ 1240 if (pcbddc->coarse_ksp) { 1241 if (applytranspose) { 1242 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1243 } else { 1244 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1245 } 1246 } 1247 1248 /* Local solution on R nodes */ 1249 if (pcis->n) { 1250 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1251 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1252 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1253 if (pcbddc->switch_static) { 1254 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1255 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1256 } 1257 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1258 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1259 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1260 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 if (pcbddc->switch_static) { 1262 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1263 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 } 1265 } 1266 1267 /* communications from coarse sol to local primal nodes */ 1268 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1269 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1270 1271 /* Sum contributions from two levels */ 1272 if (applytranspose) { 1273 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1274 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1275 } else { 1276 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1277 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1278 } 1279 PetscFunctionReturn(0); 1280 } 1281 1282 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1285 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1286 { 1287 PetscErrorCode ierr; 1288 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1289 PetscScalar *array,*array2; 1290 Vec from,to; 1291 1292 PetscFunctionBegin; 1293 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1294 from = pcbddc->coarse_vec; 1295 to = pcbddc->vec1_P; 1296 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1297 Vec tvec; 1298 PetscInt lsize; 1299 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1300 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1301 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1302 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1303 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1304 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1305 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1306 } 1307 } else { /* from local to global -> put data in coarse right hand side */ 1308 from = pcbddc->vec1_P; 1309 to = pcbddc->coarse_vec; 1310 } 1311 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1312 PetscFunctionReturn(0); 1313 } 1314 1315 #undef __FUNCT__ 1316 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1317 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1318 { 1319 PetscErrorCode ierr; 1320 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1321 PetscScalar *array,*array2; 1322 Vec from,to; 1323 1324 PetscFunctionBegin; 1325 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1326 from = pcbddc->coarse_vec; 1327 to = pcbddc->vec1_P; 1328 } else { /* from local to global -> put data in coarse right hand side */ 1329 from = pcbddc->vec1_P; 1330 to = pcbddc->coarse_vec; 1331 } 1332 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1333 if (smode == SCATTER_FORWARD) { 1334 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1335 Vec tvec; 1336 PetscInt lsize; 1337 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1338 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1339 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1340 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1341 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1342 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1343 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1344 } 1345 } 1346 PetscFunctionReturn(0); 1347 } 1348 1349 /* uncomment for testing purposes */ 1350 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1353 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1354 { 1355 PetscErrorCode ierr; 1356 PC_IS* pcis = (PC_IS*)(pc->data); 1357 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1358 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1359 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1360 MatType impMatType=MATSEQAIJ; 1361 /* one and zero */ 1362 PetscScalar one=1.0,zero=0.0; 1363 /* space to store constraints and their local indices */ 1364 PetscScalar *temp_quadrature_constraint; 1365 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1366 /* iterators */ 1367 PetscInt i,j,k,total_counts,temp_start_ptr; 1368 /* stuff to store connected components stored in pcbddc->mat_graph */ 1369 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1370 PetscInt n_ISForFaces,n_ISForEdges; 1371 /* near null space stuff */ 1372 MatNullSpace nearnullsp; 1373 const Vec *nearnullvecs; 1374 Vec *localnearnullsp; 1375 PetscBool nnsp_has_cnst; 1376 PetscInt nnsp_size; 1377 PetscScalar *array; 1378 /* BLAS integers */ 1379 PetscBLASInt lwork,lierr; 1380 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1381 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1382 /* LAPACK working arrays for SVD or POD */ 1383 PetscBool skip_lapack; 1384 PetscScalar *work; 1385 PetscReal *singular_vals; 1386 #if defined(PETSC_USE_COMPLEX) 1387 PetscReal *rwork; 1388 #endif 1389 #if defined(PETSC_MISSING_LAPACK_GESVD) 1390 PetscBLASInt Blas_one_2=1; 1391 PetscScalar *temp_basis,*correlation_mat; 1392 #else 1393 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1394 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1395 #endif 1396 /* reuse */ 1397 PetscInt olocal_primal_size; 1398 PetscInt *oprimal_indices_local_idxs; 1399 /* change of basis */ 1400 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1401 PetscBool boolforchange,qr_needed; 1402 PetscBT touched,change_basis,qr_needed_idx; 1403 /* auxiliary stuff */ 1404 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1405 PetscInt ncc,*gidxs,*permutation,*temp_indices_to_constraint_work; 1406 PetscScalar *temp_quadrature_constraint_work; 1407 /* some quantities */ 1408 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1409 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1410 1411 1412 PetscFunctionBegin; 1413 /* Destroy Mat objects computed previously */ 1414 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1415 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1416 /* Get index sets for faces, edges and vertices from graph */ 1417 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1418 /* free unneeded index sets */ 1419 if (!pcbddc->use_vertices) { 1420 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1421 } 1422 if (!pcbddc->use_edges) { 1423 for (i=0;i<n_ISForEdges;i++) { 1424 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1425 } 1426 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1427 n_ISForEdges = 0; 1428 } 1429 if (!pcbddc->use_faces) { 1430 for (i=0;i<n_ISForFaces;i++) { 1431 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1432 } 1433 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1434 n_ISForFaces = 0; 1435 } 1436 /* HACKS (the following two blocks of code) */ 1437 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1438 pcbddc->use_change_of_basis = PETSC_TRUE; 1439 if (!ISForEdges) { 1440 pcbddc->use_change_on_faces = PETSC_TRUE; 1441 } 1442 } 1443 if (pcbddc->NullSpace) { 1444 /* use_change_of_basis should be consistent among processors */ 1445 PetscBool tbool[2],gbool[2]; 1446 tbool [0] = pcbddc->use_change_of_basis; 1447 tbool [1] = pcbddc->use_change_on_faces; 1448 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1449 pcbddc->use_change_of_basis = gbool[0]; 1450 pcbddc->use_change_on_faces = gbool[1]; 1451 } 1452 /* print some info */ 1453 if (pcbddc->dbg_flag) { 1454 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1455 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1456 i = 0; 1457 if (ISForVertices) { 1458 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1459 } 1460 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1463 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1464 } 1465 /* check if near null space is attached to global mat */ 1466 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1467 if (nearnullsp) { 1468 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1469 /* remove any stored info */ 1470 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1471 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1472 /* store information for BDDC solver reuse */ 1473 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1474 pcbddc->onearnullspace = nearnullsp; 1475 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1476 for (i=0;i<nnsp_size;i++) { 1477 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1478 } 1479 } else { /* if near null space is not provided BDDC uses constants by default */ 1480 nnsp_size = 0; 1481 nnsp_has_cnst = PETSC_TRUE; 1482 } 1483 /* get max number of constraints on a single cc */ 1484 max_constraints = nnsp_size; 1485 if (nnsp_has_cnst) max_constraints++; 1486 1487 /* 1488 Evaluate maximum storage size needed by the procedure 1489 - temp_indices will contain start index of each constraint stored as follows 1490 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1491 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1492 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1493 */ 1494 total_counts = n_ISForFaces+n_ISForEdges; 1495 total_counts *= max_constraints; 1496 n_vertices = 0; 1497 if (ISForVertices) { 1498 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1499 } 1500 total_counts += n_vertices; 1501 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1502 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1503 total_counts = 0; 1504 max_size_of_constraint = 0; 1505 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1506 if (i<n_ISForEdges) { 1507 used_IS = &ISForEdges[i]; 1508 } else { 1509 used_IS = &ISForFaces[i-n_ISForEdges]; 1510 } 1511 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1512 total_counts += j; 1513 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1514 } 1515 total_counts *= max_constraints; 1516 total_counts += n_vertices; 1517 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1518 /* get local part of global near null space vectors */ 1519 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1520 for (k=0;k<nnsp_size;k++) { 1521 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1522 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1523 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1524 } 1525 1526 /* whether or not to skip lapack calls */ 1527 skip_lapack = PETSC_TRUE; 1528 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1529 1530 /* allocate some auxiliary stuff */ 1531 if (!skip_lapack || pcbddc->use_qr_single) { 1532 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1533 } 1534 1535 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1536 if (!skip_lapack) { 1537 PetscScalar temp_work; 1538 1539 #if defined(PETSC_MISSING_LAPACK_GESVD) 1540 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1541 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1542 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1543 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1544 #if defined(PETSC_USE_COMPLEX) 1545 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1546 #endif 1547 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1548 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1549 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1550 lwork = -1; 1551 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1552 #if !defined(PETSC_USE_COMPLEX) 1553 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1554 #else 1555 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1556 #endif 1557 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1558 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1559 #else /* on missing GESVD */ 1560 /* SVD */ 1561 PetscInt max_n,min_n; 1562 max_n = max_size_of_constraint; 1563 min_n = max_constraints; 1564 if (max_size_of_constraint < max_constraints) { 1565 min_n = max_size_of_constraint; 1566 max_n = max_constraints; 1567 } 1568 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1569 #if defined(PETSC_USE_COMPLEX) 1570 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1571 #endif 1572 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1573 lwork = -1; 1574 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1575 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1576 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1577 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1578 #if !defined(PETSC_USE_COMPLEX) 1579 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1580 #else 1581 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1582 #endif 1583 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1584 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1585 #endif /* on missing GESVD */ 1586 /* Allocate optimal workspace */ 1587 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1588 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1589 } 1590 /* Now we can loop on constraining sets */ 1591 total_counts = 0; 1592 temp_indices[0] = 0; 1593 /* vertices */ 1594 if (ISForVertices) { 1595 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1596 if (nnsp_has_cnst) { /* consider all vertices */ 1597 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1598 for (i=0;i<n_vertices;i++) { 1599 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1600 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1601 total_counts++; 1602 } 1603 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1604 PetscBool used_vertex; 1605 for (i=0;i<n_vertices;i++) { 1606 used_vertex = PETSC_FALSE; 1607 k = 0; 1608 while (!used_vertex && k<nnsp_size) { 1609 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1610 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1611 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1612 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1613 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1614 total_counts++; 1615 used_vertex = PETSC_TRUE; 1616 } 1617 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1618 k++; 1619 } 1620 } 1621 } 1622 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1623 n_vertices = total_counts; 1624 } 1625 1626 /* edges and faces */ 1627 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1628 if (ncc<n_ISForEdges) { 1629 used_IS = &ISForEdges[ncc]; 1630 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1631 } else { 1632 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1633 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1634 } 1635 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1636 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1637 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1638 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1639 /* change of basis should not be performed on local periodic nodes */ 1640 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1641 if (nnsp_has_cnst) { 1642 PetscScalar quad_value; 1643 temp_constraints++; 1644 if (!pcbddc->use_nnsp_true) { 1645 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1646 } else { 1647 quad_value = 1.0; 1648 } 1649 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1650 for (j=0;j<size_of_constraint;j++) { 1651 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1652 } 1653 /* sort by global ordering if using lapack subroutines */ 1654 if (!skip_lapack || pcbddc->use_qr_single) { 1655 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1656 for (j=0;j<size_of_constraint;j++) { 1657 permutation[j]=j; 1658 } 1659 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1660 for (j=0;j<size_of_constraint;j++) { 1661 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1662 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1663 } 1664 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1665 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1666 } 1667 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1668 total_counts++; 1669 } 1670 for (k=0;k<nnsp_size;k++) { 1671 PetscReal real_value; 1672 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1673 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1674 for (j=0;j<size_of_constraint;j++) { 1675 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1676 } 1677 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1678 /* check if array is null on the connected component */ 1679 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1680 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1681 if (real_value > 0.0) { /* keep indices and values */ 1682 /* sort by global ordering if using lapack subroutines */ 1683 if (!skip_lapack || pcbddc->use_qr_single) { 1684 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1685 for (j=0;j<size_of_constraint;j++) { 1686 permutation[j]=j; 1687 } 1688 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1689 for (j=0;j<size_of_constraint;j++) { 1690 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1691 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1692 } 1693 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1694 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1695 } 1696 temp_constraints++; 1697 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1698 total_counts++; 1699 } 1700 } 1701 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1702 valid_constraints = temp_constraints; 1703 if (!pcbddc->use_nnsp_true && temp_constraints) { 1704 if (temp_constraints == 1) { /* just normalize the constraint */ 1705 PetscScalar norm; 1706 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1707 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1708 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1709 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1710 } else { /* perform SVD */ 1711 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1712 1713 #if defined(PETSC_MISSING_LAPACK_GESVD) 1714 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1715 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1716 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1717 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1718 from that computed using LAPACKgesvd 1719 -> This is due to a different computation of eigenvectors in LAPACKheev 1720 -> The quality of the POD-computed basis will be the same */ 1721 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1722 /* Store upper triangular part of correlation matrix */ 1723 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1724 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1725 for (j=0;j<temp_constraints;j++) { 1726 for (k=0;k<j+1;k++) { 1727 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1728 } 1729 } 1730 /* compute eigenvalues and eigenvectors of correlation matrix */ 1731 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1732 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1733 #if !defined(PETSC_USE_COMPLEX) 1734 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1735 #else 1736 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1737 #endif 1738 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1739 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1740 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1741 j = 0; 1742 while (j < temp_constraints && singular_vals[j] < tol) j++; 1743 total_counts = total_counts-j; 1744 valid_constraints = temp_constraints-j; 1745 /* scale and copy POD basis into used quadrature memory */ 1746 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1747 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1748 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1749 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1750 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1751 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1752 if (j<temp_constraints) { 1753 PetscInt ii; 1754 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1755 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1756 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1757 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1758 for (k=0;k<temp_constraints-j;k++) { 1759 for (ii=0;ii<size_of_constraint;ii++) { 1760 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1761 } 1762 } 1763 } 1764 #else /* on missing GESVD */ 1765 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1766 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1767 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1768 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1769 #if !defined(PETSC_USE_COMPLEX) 1770 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1771 #else 1772 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1773 #endif 1774 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1775 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1776 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1777 k = temp_constraints; 1778 if (k > size_of_constraint) k = size_of_constraint; 1779 j = 0; 1780 while (j < k && singular_vals[k-j-1] < tol) j++; 1781 valid_constraints = k-j; 1782 total_counts = total_counts-temp_constraints+valid_constraints; 1783 #endif /* on missing GESVD */ 1784 } 1785 } 1786 /* setting change_of_basis flag is safe now */ 1787 if (boolforchange) { 1788 for (j=0;j<valid_constraints;j++) { 1789 PetscBTSet(change_basis,total_counts-j-1); 1790 } 1791 } 1792 } 1793 /* free index sets of faces, edges and vertices */ 1794 for (i=0;i<n_ISForFaces;i++) { 1795 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1796 } 1797 if (n_ISForFaces) { 1798 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1799 } 1800 for (i=0;i<n_ISForEdges;i++) { 1801 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1802 } 1803 if (n_ISForEdges) { 1804 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1805 } 1806 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1807 /* map temp_indices_to_constraint in boundary numbering */ 1808 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1809 if (i != temp_indices[total_counts]) { 1810 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1811 } 1812 1813 /* free workspace */ 1814 if (!skip_lapack || pcbddc->use_qr_single) { 1815 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 1816 } 1817 if (!skip_lapack) { 1818 ierr = PetscFree(work);CHKERRQ(ierr); 1819 #if defined(PETSC_USE_COMPLEX) 1820 ierr = PetscFree(rwork);CHKERRQ(ierr); 1821 #endif 1822 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1823 #if defined(PETSC_MISSING_LAPACK_GESVD) 1824 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1825 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1826 #endif 1827 } 1828 for (k=0;k<nnsp_size;k++) { 1829 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1830 } 1831 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1832 1833 /* set quantities in pcbddc data structure and store previous primal size */ 1834 /* n_vertices defines the number of subdomain corners in the primal space */ 1835 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1836 olocal_primal_size = pcbddc->local_primal_size; 1837 pcbddc->local_primal_size = total_counts; 1838 pcbddc->n_vertices = n_vertices; 1839 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1840 1841 /* Create constraint matrix */ 1842 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1843 /* so we need to set it up properly either with or without change of basis */ 1844 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1845 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1846 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1847 /* array to compute a local numbering of constraints : vertices first then constraints */ 1848 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1849 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1850 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1851 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1852 /* auxiliary stuff for basis change */ 1853 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1854 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1855 1856 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1857 total_primal_vertices=0; 1858 for (i=0;i<pcbddc->local_primal_size;i++) { 1859 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1860 if (size_of_constraint == 1) { 1861 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1862 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1863 aux_primal_minloc[total_primal_vertices]=0; 1864 total_primal_vertices++; 1865 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1866 PetscInt min_loc,min_index; 1867 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1868 /* find first untouched local node */ 1869 k = 0; 1870 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1871 min_index = global_indices[k]; 1872 min_loc = k; 1873 /* search the minimum among global nodes already untouched on the cc */ 1874 for (k=1;k<size_of_constraint;k++) { 1875 /* there can be more than one constraint on a single connected component */ 1876 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1877 min_index = global_indices[k]; 1878 min_loc = k; 1879 } 1880 } 1881 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1882 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1883 aux_primal_minloc[total_primal_vertices]=min_loc; 1884 total_primal_vertices++; 1885 } 1886 } 1887 /* determine if a QR strategy is needed for change of basis */ 1888 qr_needed = PETSC_FALSE; 1889 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1890 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1891 if (PetscBTLookup(change_basis,i)) { 1892 if (!pcbddc->use_qr_single) { 1893 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1894 j = 0; 1895 for (k=0;k<size_of_constraint;k++) { 1896 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1897 j++; 1898 } 1899 } 1900 /* found more than one primal dof on the cc */ 1901 if (j > 1) { 1902 PetscBTSet(qr_needed_idx,i); 1903 qr_needed = PETSC_TRUE; 1904 } 1905 } else { 1906 PetscBTSet(qr_needed_idx,i); 1907 qr_needed = PETSC_TRUE; 1908 } 1909 } 1910 } 1911 /* free workspace */ 1912 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1913 1914 /* permute indices in order to have a sorted set of vertices */ 1915 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1916 1917 /* nonzero structure of constraint matrix */ 1918 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1919 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1920 j=total_primal_vertices; 1921 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1922 if (!PetscBTLookup(change_basis,i)) { 1923 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1924 j++; 1925 } 1926 } 1927 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1928 ierr = PetscFree(nnz);CHKERRQ(ierr); 1929 /* set values in constraint matrix */ 1930 for (i=0;i<total_primal_vertices;i++) { 1931 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1932 } 1933 total_counts = total_primal_vertices; 1934 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1935 if (!PetscBTLookup(change_basis,i)) { 1936 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1937 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1938 total_counts++; 1939 } 1940 } 1941 /* assembling */ 1942 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1943 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1944 /* 1945 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1946 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1947 */ 1948 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1949 if (pcbddc->use_change_of_basis) { 1950 /* dual and primal dofs on a single cc */ 1951 PetscInt dual_dofs,primal_dofs; 1952 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1953 PetscInt primal_counter; 1954 /* working stuff for GEQRF */ 1955 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 1956 PetscBLASInt lqr_work; 1957 /* working stuff for UNGQR */ 1958 PetscScalar *gqr_work,lgqr_work_t; 1959 PetscBLASInt lgqr_work; 1960 /* working stuff for TRTRS */ 1961 PetscScalar *trs_rhs; 1962 PetscBLASInt Blas_NRHS; 1963 /* pointers for values insertion into change of basis matrix */ 1964 PetscInt *start_rows,*start_cols; 1965 PetscScalar *start_vals; 1966 /* working stuff for values insertion */ 1967 PetscBT is_primal; 1968 /* matrix sizes */ 1969 PetscInt global_size,local_size; 1970 /* work array for nonzeros */ 1971 PetscScalar *nnz_array; 1972 /* temporary change of basis */ 1973 Mat localChangeOfBasisMatrix; 1974 /* auxiliary work for global change of basis */ 1975 Vec nnz_vec; 1976 PetscInt *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz; 1977 PetscInt nvtxs,*xadj,*adjncy,*idxs_mapped; 1978 PetscScalar *vals; 1979 PetscBool done; 1980 1981 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 1982 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 1983 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1984 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1985 1986 /* nonzeros for local mat */ 1987 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 1988 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1989 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1990 if (PetscBTLookup(change_basis,i)) { 1991 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1992 if (PetscBTLookup(qr_needed_idx,i)) { 1993 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1994 } else { 1995 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1996 /* get local primal index on the cc */ 1997 j = 0; 1998 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1999 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2000 } 2001 } 2002 } 2003 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2004 /* Set initial identity in the matrix */ 2005 for (i=0;i<pcis->n_B;i++) { 2006 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2007 } 2008 2009 if (pcbddc->dbg_flag) { 2010 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2011 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2012 } 2013 2014 2015 /* Now we loop on the constraints which need a change of basis */ 2016 /* 2017 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2018 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2019 2020 Basic blocks of change of basis matrix T computed by 2021 2022 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2023 2024 | 1 0 ... 0 s_1/S | 2025 | 0 1 ... 0 s_2/S | 2026 | ... | 2027 | 0 ... 1 s_{n-1}/S | 2028 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2029 2030 with S = \sum_{i=1}^n s_i^2 2031 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2032 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2033 2034 - QR decomposition of constraints otherwise 2035 */ 2036 if (qr_needed) { 2037 /* space to store Q */ 2038 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2039 /* first we issue queries for optimal work */ 2040 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2041 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2042 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2043 lqr_work = -1; 2044 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2045 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2046 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2047 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2048 lgqr_work = -1; 2049 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2050 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2051 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2052 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2053 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2054 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2055 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2056 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2057 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2058 /* array to store scaling factors for reflectors */ 2059 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2060 /* array to store rhs and solution of triangular solver */ 2061 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2062 /* allocating workspace for check */ 2063 if (pcbddc->dbg_flag) { 2064 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2065 } 2066 } 2067 /* array to store whether a node is primal or not */ 2068 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2069 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2070 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2071 if (i != total_primal_vertices) { 2072 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2073 } 2074 for (i=0;i<total_primal_vertices;i++) { 2075 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2076 } 2077 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2078 2079 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2080 /* -> using implicit ordering contained in temp_indices data */ 2081 total_counts = pcbddc->n_vertices; 2082 primal_counter = total_counts; 2083 while (total_counts<pcbddc->local_primal_size) { 2084 primal_dofs = 1; 2085 if (PetscBTLookup(change_basis,total_counts)) { 2086 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2087 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2088 primal_dofs++; 2089 } 2090 /* get constraint info */ 2091 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2092 dual_dofs = size_of_constraint-primal_dofs; 2093 2094 if (pcbddc->dbg_flag) { 2095 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2096 } 2097 2098 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2099 2100 /* copy quadrature constraints for change of basis check */ 2101 if (pcbddc->dbg_flag) { 2102 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2103 } 2104 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2105 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2106 2107 /* compute QR decomposition of constraints */ 2108 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2109 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2110 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2111 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2112 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2113 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2114 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2115 2116 /* explictly compute R^-T */ 2117 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2118 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2119 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2120 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2121 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2122 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2123 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2124 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2125 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2126 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2127 2128 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2129 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2130 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2131 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2132 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2133 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2134 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2135 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2136 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2137 2138 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2139 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2140 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2141 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2142 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2143 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2144 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2145 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2146 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2147 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2148 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2149 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2150 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2151 2152 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2153 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2154 /* insert cols for primal dofs */ 2155 for (j=0;j<primal_dofs;j++) { 2156 start_vals = &qr_basis[j*size_of_constraint]; 2157 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2158 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2159 } 2160 /* insert cols for dual dofs */ 2161 for (j=0,k=0;j<dual_dofs;k++) { 2162 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2163 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2164 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2165 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2166 j++; 2167 } 2168 } 2169 2170 /* check change of basis */ 2171 if (pcbddc->dbg_flag) { 2172 PetscInt ii,jj; 2173 PetscBool valid_qr=PETSC_TRUE; 2174 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2175 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2176 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2177 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2178 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2179 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2180 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2181 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2182 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2183 for (jj=0;jj<size_of_constraint;jj++) { 2184 for (ii=0;ii<primal_dofs;ii++) { 2185 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2186 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2187 } 2188 } 2189 if (!valid_qr) { 2190 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2191 for (jj=0;jj<size_of_constraint;jj++) { 2192 for (ii=0;ii<primal_dofs;ii++) { 2193 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2194 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2195 } 2196 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2197 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2198 } 2199 } 2200 } 2201 } else { 2202 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2203 } 2204 } 2205 } else { /* simple transformation block */ 2206 PetscInt row,col; 2207 PetscScalar val,norm; 2208 2209 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2210 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2211 for (j=0;j<size_of_constraint;j++) { 2212 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2213 if (!PetscBTLookup(is_primal,row)) { 2214 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2215 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2216 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2217 } else { 2218 for (k=0;k<size_of_constraint;k++) { 2219 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2220 if (row != col) { 2221 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2222 } else { 2223 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2224 } 2225 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2226 } 2227 } 2228 } 2229 if (pcbddc->dbg_flag) { 2230 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2231 } 2232 } 2233 /* increment primal counter */ 2234 primal_counter += primal_dofs; 2235 } else { 2236 if (pcbddc->dbg_flag) { 2237 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2238 } 2239 } 2240 /* increment constraint counter total_counts */ 2241 total_counts += primal_dofs; 2242 } 2243 2244 /* free workspace */ 2245 if (qr_needed) { 2246 if (pcbddc->dbg_flag) { 2247 ierr = PetscFree(work);CHKERRQ(ierr); 2248 } 2249 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2250 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2251 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2252 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2253 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2254 } 2255 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2256 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2257 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2258 2259 /* assembling of global change of variable */ 2260 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2261 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2262 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2263 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2264 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2265 ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr); 2266 2267 /* nonzeros (overestimated) */ 2268 ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr); 2269 ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr); 2270 ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr); 2271 for (i=0;i<pcis->n;i++) { 2272 nnz_array[i] = 1.0; 2273 idxs_all[i] = i; 2274 } 2275 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2276 for (i=0;i<pcis->n_B;i++) { 2277 nnz_array[idxs_B[i]] = nnz[i]; 2278 } 2279 if (pcis->n) { 2280 ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr); 2281 } 2282 ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr); 2283 ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr); 2284 ierr = PetscFree(nnz);CHKERRQ(ierr); 2285 ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr); 2286 ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr); 2287 ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2288 for (i=0;i<local_size;i++) { 2289 d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size); 2290 o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size); 2291 } 2292 ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2293 ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr); 2294 ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2295 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2296 2297 /* Set identity on dirichlet dofs */ 2298 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2299 for (i=0;i<pcis->n-pcis->n_B;i++) { 2300 PetscScalar one=1.0; 2301 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr); 2302 } 2303 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2304 2305 /* Set values at interface dofs */ 2306 done = PETSC_TRUE; 2307 ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2308 if (!done) { 2309 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2310 } 2311 ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2312 ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr); 2313 ierr = ISLocalToGlobalMappingApply(pcis->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr); 2314 for (i=0;i<nvtxs;i++) { 2315 PetscInt row,*cols,ncols; 2316 PetscScalar *mat_vals; 2317 2318 row = idxs_B[i]; 2319 ncols = xadj[i+1]-xadj[i]; 2320 cols = idxs_mapped+xadj[i]; 2321 mat_vals = vals+xadj[i]; 2322 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr); 2323 } 2324 ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2325 if (!done) { 2326 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2327 } 2328 ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2329 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2330 ierr = PetscFree(idxs_mapped);CHKERRQ(ierr); 2331 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2332 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2333 2334 /* check */ 2335 if (pcbddc->dbg_flag) { 2336 PetscReal error; 2337 Vec x,x_change; 2338 2339 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2340 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2341 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2342 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2343 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2344 ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2345 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 2346 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2347 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2348 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2349 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2350 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2351 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2352 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr); 2353 ierr = VecDestroy(&x);CHKERRQ(ierr); 2354 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2355 } 2356 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2357 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2358 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2359 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2360 } 2361 2362 /* set up change of basis context */ 2363 if (pcbddc->ChangeOfBasisMatrix) { 2364 PCBDDCChange_ctx change_ctx; 2365 2366 if (!pcbddc->new_global_mat) { 2367 PetscInt global_size,local_size; 2368 2369 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2370 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2371 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2372 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2373 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2374 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2375 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2376 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2377 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2378 } else { 2379 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2380 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2381 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2382 } 2383 if (!pcbddc->user_ChangeOfBasisMatrix) { 2384 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2385 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2386 } else { 2387 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2388 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2389 } 2390 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2391 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2392 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2393 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2394 } 2395 2396 /* get indices in local ordering for vertices and constraints */ 2397 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2398 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2399 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2400 } 2401 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2402 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2403 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2404 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2405 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2406 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2407 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2408 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2409 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2410 /* set quantities in PCBDDC data struct */ 2411 pcbddc->n_actual_vertices = i; 2412 /* check if a new primal space has been introduced */ 2413 pcbddc->new_primal_space_local = PETSC_TRUE; 2414 if (olocal_primal_size == pcbddc->local_primal_size) { 2415 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2416 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2417 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2418 } 2419 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2420 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2421 2422 /* flush dbg viewer */ 2423 if (pcbddc->dbg_flag) { 2424 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2425 } 2426 2427 /* free workspace */ 2428 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2429 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2430 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2431 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2432 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2433 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2434 PetscFunctionReturn(0); 2435 } 2436 2437 #undef __FUNCT__ 2438 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2439 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2440 { 2441 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2442 PC_IS *pcis = (PC_IS*)pc->data; 2443 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2444 PetscInt ierr,i,vertex_size; 2445 PetscViewer viewer=pcbddc->dbg_viewer; 2446 2447 PetscFunctionBegin; 2448 /* Reset previously computed graph */ 2449 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2450 /* Init local Graph struct */ 2451 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2452 2453 /* Check validity of the csr graph passed in by the user */ 2454 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2455 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2456 } 2457 2458 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2459 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2460 Mat mat_adj; 2461 PetscInt *xadj,*adjncy; 2462 PetscInt nvtxs; 2463 PetscBool flg_row=PETSC_TRUE; 2464 2465 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2466 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2467 if (!flg_row) { 2468 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2469 } 2470 if (pcbddc->use_local_adj) { 2471 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2472 pcbddc->deluxe_compute_rowadj = PETSC_FALSE; 2473 } else { /* just compute subdomain's connected components */ 2474 IS is_dummy; 2475 ISLocalToGlobalMapping l2gmap_dummy; 2476 PetscInt j,sum; 2477 PetscInt *cxadj,*cadjncy; 2478 const PetscInt *idxs; 2479 PCBDDCGraph graph; 2480 PetscBT is_on_boundary; 2481 2482 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2483 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2484 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2485 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2486 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2487 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2488 graph->xadj = xadj; 2489 graph->adjncy = adjncy; 2490 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2491 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2492 2493 if (pcbddc->dbg_flag) { 2494 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2495 for (i=0;i<graph->ncc;i++) { 2496 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2497 } 2498 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2499 } 2500 2501 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2502 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2503 for (i=0;i<pcis->n_B;i++) { 2504 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2505 } 2506 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2507 2508 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2509 sum = 0; 2510 for (i=0;i<graph->ncc;i++) { 2511 PetscInt sizecc = 0; 2512 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2513 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2514 sizecc++; 2515 } 2516 } 2517 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2518 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2519 cxadj[graph->queue[j]] = sizecc; 2520 } 2521 } 2522 sum += sizecc*sizecc; 2523 } 2524 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2525 sum = 0; 2526 for (i=0;i<nvtxs;i++) { 2527 PetscInt temp = cxadj[i]; 2528 cxadj[i] = sum; 2529 sum += temp; 2530 } 2531 cxadj[nvtxs] = sum; 2532 for (i=0;i<graph->ncc;i++) { 2533 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2534 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2535 PetscInt k,sizecc = 0; 2536 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2537 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2538 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2539 sizecc++; 2540 } 2541 } 2542 } 2543 } 2544 } 2545 if (nvtxs) { 2546 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2547 } else { 2548 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2549 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2550 } 2551 graph->xadj = 0; 2552 graph->adjncy = 0; 2553 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2554 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2555 } 2556 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2557 if (!flg_row) { 2558 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2559 } 2560 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2561 } 2562 2563 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2564 vertex_size = 1; 2565 if (pcbddc->user_provided_isfordofs) { 2566 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2567 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2568 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2569 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2570 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2571 } 2572 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2573 pcbddc->n_ISForDofs = 0; 2574 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2575 } 2576 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2577 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2578 } else { 2579 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2580 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2581 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2582 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2583 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2584 } 2585 } 2586 } 2587 2588 /* Setup of Graph */ 2589 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2590 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2591 } 2592 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2593 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2594 } 2595 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2596 2597 /* Graph's connected components analysis */ 2598 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2599 2600 /* print some info to stdout */ 2601 if (pcbddc->dbg_flag) { 2602 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2603 } 2604 2605 /* mark topography has done */ 2606 pcbddc->recompute_topography = PETSC_FALSE; 2607 PetscFunctionReturn(0); 2608 } 2609 2610 #undef __FUNCT__ 2611 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2612 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2613 { 2614 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2615 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2616 PetscErrorCode ierr; 2617 2618 PetscFunctionBegin; 2619 n = 0; 2620 vertices = 0; 2621 if (pcbddc->ConstraintMatrix) { 2622 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2623 for (i=0;i<local_primal_size;i++) { 2624 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2625 if (size_of_constraint == 1) n++; 2626 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2627 } 2628 if (vertices_idx) { 2629 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2630 n = 0; 2631 for (i=0;i<local_primal_size;i++) { 2632 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2633 if (size_of_constraint == 1) { 2634 vertices[n++]=row_cmat_indices[0]; 2635 } 2636 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2637 } 2638 } 2639 } 2640 *n_vertices = n; 2641 if (vertices_idx) *vertices_idx = vertices; 2642 PetscFunctionReturn(0); 2643 } 2644 2645 #undef __FUNCT__ 2646 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2647 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2648 { 2649 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2650 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2651 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2652 PetscBT touched; 2653 PetscErrorCode ierr; 2654 2655 /* This function assumes that the number of local constraints per connected component 2656 is not greater than the number of nodes defined for the connected component 2657 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2658 PetscFunctionBegin; 2659 n = 0; 2660 constraints_index = 0; 2661 if (pcbddc->ConstraintMatrix) { 2662 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2663 max_size_of_constraint = 0; 2664 for (i=0;i<local_primal_size;i++) { 2665 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2666 if (size_of_constraint > 1) { 2667 n++; 2668 } 2669 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2670 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2671 } 2672 if (constraints_idx) { 2673 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2674 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2675 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2676 n = 0; 2677 for (i=0;i<local_primal_size;i++) { 2678 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2679 if (size_of_constraint > 1) { 2680 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2681 /* find first untouched local node */ 2682 j = 0; 2683 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2684 min_index = row_cmat_global_indices[j]; 2685 min_loc = j; 2686 /* search the minimum among nodes not yet touched on the connected component 2687 since there can be more than one constraint on a single cc */ 2688 for (j=1;j<size_of_constraint;j++) { 2689 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2690 min_index = row_cmat_global_indices[j]; 2691 min_loc = j; 2692 } 2693 } 2694 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2695 constraints_index[n++] = row_cmat_indices[min_loc]; 2696 } 2697 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2698 } 2699 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2700 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2701 } 2702 } 2703 *n_constraints = n; 2704 if (constraints_idx) *constraints_idx = constraints_index; 2705 PetscFunctionReturn(0); 2706 } 2707 2708 #undef __FUNCT__ 2709 #define __FUNCT__ "PCBDDCSubsetNumbering" 2710 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2711 { 2712 Vec local_vec,global_vec; 2713 IS seqis,paris; 2714 VecScatter scatter_ctx; 2715 PetscScalar *array; 2716 PetscInt *temp_global_dofs; 2717 PetscScalar globalsum; 2718 PetscInt i,j,s; 2719 PetscInt nlocals,first_index,old_index,max_local; 2720 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2721 PetscMPIInt *dof_sizes,*dof_displs; 2722 PetscBool first_found; 2723 PetscErrorCode ierr; 2724 2725 PetscFunctionBegin; 2726 /* mpi buffers */ 2727 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2728 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2729 j = ( !rank_prec_comm ? size_prec_comm : 0); 2730 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2731 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2732 /* get maximum size of subset */ 2733 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2734 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2735 max_local = 0; 2736 for (i=0;i<n_local_dofs;i++) { 2737 if (max_local < temp_global_dofs[i] ) { 2738 max_local = temp_global_dofs[i]; 2739 } 2740 } 2741 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2742 max_global++; 2743 max_local = 0; 2744 for (i=0;i<n_local_dofs;i++) { 2745 if (max_local < local_dofs[i] ) { 2746 max_local = local_dofs[i]; 2747 } 2748 } 2749 max_local++; 2750 /* allocate workspace */ 2751 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2752 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2753 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2754 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2755 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2756 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2757 /* create scatter */ 2758 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2759 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2760 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2761 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2762 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2763 /* init array */ 2764 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2765 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2766 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2767 if (local_dofs_mult) { 2768 for (i=0;i<n_local_dofs;i++) { 2769 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2770 } 2771 } else { 2772 for (i=0;i<n_local_dofs;i++) { 2773 array[local_dofs[i]]=1.0; 2774 } 2775 } 2776 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2777 /* scatter into global vec and get total number of global dofs */ 2778 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2779 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2780 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2781 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2782 /* Fill global_vec with cumulative function for global numbering */ 2783 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2784 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2785 nlocals = 0; 2786 first_index = -1; 2787 first_found = PETSC_FALSE; 2788 for (i=0;i<s;i++) { 2789 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2790 first_found = PETSC_TRUE; 2791 first_index = i; 2792 } 2793 nlocals += (PetscInt)PetscRealPart(array[i]); 2794 } 2795 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2796 if (!rank_prec_comm) { 2797 dof_displs[0]=0; 2798 for (i=1;i<size_prec_comm;i++) { 2799 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2800 } 2801 } 2802 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2803 if (first_found) { 2804 array[first_index] += (PetscScalar)nlocals; 2805 old_index = first_index; 2806 for (i=first_index+1;i<s;i++) { 2807 if (PetscRealPart(array[i]) > 0.1) { 2808 array[i] += array[old_index]; 2809 old_index = i; 2810 } 2811 } 2812 } 2813 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2814 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2815 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2816 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2817 /* get global ordering of local dofs */ 2818 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2819 if (local_dofs_mult) { 2820 for (i=0;i<n_local_dofs;i++) { 2821 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2822 } 2823 } else { 2824 for (i=0;i<n_local_dofs;i++) { 2825 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2826 } 2827 } 2828 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2829 /* free workspace */ 2830 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2831 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2832 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2833 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2834 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2835 /* return pointer to global ordering of local dofs */ 2836 *global_numbering_subset = temp_global_dofs; 2837 PetscFunctionReturn(0); 2838 } 2839 2840 #undef __FUNCT__ 2841 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2842 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2843 { 2844 PetscInt i,j; 2845 PetscScalar *alphas; 2846 PetscErrorCode ierr; 2847 2848 PetscFunctionBegin; 2849 /* this implements stabilized Gram-Schmidt */ 2850 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2851 for (i=0;i<n;i++) { 2852 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2853 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2854 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2855 } 2856 ierr = PetscFree(alphas);CHKERRQ(ierr); 2857 PetscFunctionReturn(0); 2858 } 2859 2860 #undef __FUNCT__ 2861 #define __FUNCT__ "MatISGetSubassemblingPattern" 2862 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2863 { 2864 Mat subdomain_adj; 2865 IS new_ranks,ranks_send_to; 2866 MatPartitioning partitioner; 2867 Mat_IS *matis; 2868 PetscInt n_neighs,*neighs,*n_shared,**shared; 2869 PetscInt prank; 2870 PetscMPIInt size,rank,color; 2871 PetscInt *xadj,*adjncy,*oldranks; 2872 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2873 PetscInt i,local_size,threshold=0; 2874 PetscErrorCode ierr; 2875 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2876 PetscSubcomm subcomm; 2877 2878 PetscFunctionBegin; 2879 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2880 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2881 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2882 2883 /* Get info on mapping */ 2884 matis = (Mat_IS*)(mat->data); 2885 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2886 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2887 2888 /* build local CSR graph of subdomains' connectivity */ 2889 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2890 xadj[0] = 0; 2891 xadj[1] = PetscMax(n_neighs-1,0); 2892 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2893 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2894 2895 if (threshold) { 2896 PetscInt xadj_count = 0; 2897 for (i=1;i<n_neighs;i++) { 2898 if (n_shared[i] > threshold) { 2899 adjncy[xadj_count] = neighs[i]; 2900 adjncy_wgt[xadj_count] = n_shared[i]; 2901 xadj_count++; 2902 } 2903 } 2904 xadj[1] = xadj_count; 2905 } else { 2906 if (xadj[1]) { 2907 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2908 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2909 } 2910 } 2911 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2912 if (use_square) { 2913 for (i=0;i<xadj[1];i++) { 2914 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2915 } 2916 } 2917 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2918 2919 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 2920 2921 /* 2922 Restrict work on active processes only. 2923 */ 2924 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2925 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2926 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2927 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2928 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2929 if (color) { 2930 ierr = PetscFree(xadj);CHKERRQ(ierr); 2931 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2932 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2933 } else { 2934 PetscInt coarsening_ratio; 2935 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2936 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2937 prank = rank; 2938 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2939 /* 2940 for (i=0;i<size;i++) { 2941 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2942 } 2943 */ 2944 for (i=0;i<xadj[1];i++) { 2945 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2946 } 2947 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2948 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2949 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2950 2951 /* Partition */ 2952 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2953 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2954 if (use_vwgt) { 2955 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 2956 v_wgt[0] = local_size; 2957 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2958 } 2959 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2960 coarsening_ratio = size/n_subdomains; 2961 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2962 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2963 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2964 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2965 2966 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2967 if (contiguous) { 2968 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2969 } else { 2970 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2971 } 2972 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2973 /* clean up */ 2974 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2975 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2976 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2977 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2978 } 2979 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2980 2981 /* assemble parallel IS for sends */ 2982 i = 1; 2983 if (color) i=0; 2984 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2985 2986 /* get back IS */ 2987 *is_sends = ranks_send_to; 2988 PetscFunctionReturn(0); 2989 } 2990 2991 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2992 2993 #undef __FUNCT__ 2994 #define __FUNCT__ "MatISSubassemble" 2995 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 2996 { 2997 Mat local_mat; 2998 Mat_IS *matis; 2999 IS is_sends_internal; 3000 PetscInt rows,cols,new_local_rows; 3001 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3002 PetscBool ismatis,isdense,newisdense,destroy_mat; 3003 ISLocalToGlobalMapping l2gmap; 3004 PetscInt* l2gmap_indices; 3005 const PetscInt* is_indices; 3006 MatType new_local_type; 3007 /* buffers */ 3008 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3009 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3010 PetscInt *recv_buffer_idxs_local; 3011 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3012 /* MPI */ 3013 MPI_Comm comm,comm_n; 3014 PetscSubcomm subcomm; 3015 PetscMPIInt n_sends,n_recvs,commsize; 3016 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3017 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3018 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3019 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3020 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3021 PetscErrorCode ierr; 3022 3023 PetscFunctionBegin; 3024 /* TODO: add missing checks */ 3025 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3026 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3027 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3028 PetscValidLogicalCollectiveInt(mat,nis,7); 3029 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3030 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3031 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3032 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3033 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3034 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3035 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3036 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3037 PetscInt mrows,mcols,mnrows,mncols; 3038 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3039 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3040 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3041 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3042 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3043 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3044 } 3045 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3046 PetscValidLogicalCollectiveInt(mat,bs,0); 3047 /* prepare IS for sending if not provided */ 3048 if (!is_sends) { 3049 PetscBool pcontig = PETSC_TRUE; 3050 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3051 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3052 } else { 3053 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3054 is_sends_internal = is_sends; 3055 } 3056 3057 /* get pointer of MATIS data */ 3058 matis = (Mat_IS*)mat->data; 3059 3060 /* get comm */ 3061 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3062 3063 /* compute number of sends */ 3064 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3065 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3066 3067 /* compute number of receives */ 3068 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3069 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3070 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3071 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3072 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3073 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3074 ierr = PetscFree(iflags);CHKERRQ(ierr); 3075 3076 /* restrict comm if requested */ 3077 subcomm = 0; 3078 destroy_mat = PETSC_FALSE; 3079 if (restrict_comm) { 3080 PetscMPIInt color,rank,subcommsize; 3081 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3082 color = 0; 3083 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3084 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3085 subcommsize = commsize - subcommsize; 3086 /* check if reuse has been requested */ 3087 if (reuse == MAT_REUSE_MATRIX) { 3088 if (*mat_n) { 3089 PetscMPIInt subcommsize2; 3090 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3091 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3092 comm_n = PetscObjectComm((PetscObject)*mat_n); 3093 } else { 3094 comm_n = PETSC_COMM_SELF; 3095 } 3096 } else { /* MAT_INITIAL_MATRIX */ 3097 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3098 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3099 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3100 comm_n = subcomm->comm; 3101 } 3102 /* flag to destroy *mat_n if not significative */ 3103 if (color) destroy_mat = PETSC_TRUE; 3104 } else { 3105 comm_n = comm; 3106 } 3107 3108 /* prepare send/receive buffers */ 3109 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3110 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3111 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3112 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3113 if (nis) { 3114 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3115 } 3116 3117 /* Get data from local matrices */ 3118 if (!isdense) { 3119 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3120 /* TODO: See below some guidelines on how to prepare the local buffers */ 3121 /* 3122 send_buffer_vals should contain the raw values of the local matrix 3123 send_buffer_idxs should contain: 3124 - MatType_PRIVATE type 3125 - PetscInt size_of_l2gmap 3126 - PetscInt global_row_indices[size_of_l2gmap] 3127 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3128 */ 3129 } else { 3130 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3131 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3132 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3133 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3134 send_buffer_idxs[1] = i; 3135 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3136 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3137 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3138 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3139 for (i=0;i<n_sends;i++) { 3140 ilengths_vals[is_indices[i]] = len*len; 3141 ilengths_idxs[is_indices[i]] = len+2; 3142 } 3143 } 3144 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3145 /* additional is (if any) */ 3146 if (nis) { 3147 PetscMPIInt psum; 3148 PetscInt j; 3149 for (j=0,psum=0;j<nis;j++) { 3150 PetscInt plen; 3151 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3152 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3153 psum += len+1; /* indices + lenght */ 3154 } 3155 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3156 for (j=0,psum=0;j<nis;j++) { 3157 PetscInt plen; 3158 const PetscInt *is_array_idxs; 3159 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3160 send_buffer_idxs_is[psum] = plen; 3161 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3162 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3163 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3164 psum += plen+1; /* indices + lenght */ 3165 } 3166 for (i=0;i<n_sends;i++) { 3167 ilengths_idxs_is[is_indices[i]] = psum; 3168 } 3169 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3170 } 3171 3172 buf_size_idxs = 0; 3173 buf_size_vals = 0; 3174 buf_size_idxs_is = 0; 3175 for (i=0;i<n_recvs;i++) { 3176 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3177 buf_size_vals += (PetscInt)olengths_vals[i]; 3178 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3179 } 3180 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3181 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3182 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3183 3184 /* get new tags for clean communications */ 3185 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3186 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3187 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3188 3189 /* allocate for requests */ 3190 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3191 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3192 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3193 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3194 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3195 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3196 3197 /* communications */ 3198 ptr_idxs = recv_buffer_idxs; 3199 ptr_vals = recv_buffer_vals; 3200 ptr_idxs_is = recv_buffer_idxs_is; 3201 for (i=0;i<n_recvs;i++) { 3202 source_dest = onodes[i]; 3203 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3204 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3205 ptr_idxs += olengths_idxs[i]; 3206 ptr_vals += olengths_vals[i]; 3207 if (nis) { 3208 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3209 ptr_idxs_is += olengths_idxs_is[i]; 3210 } 3211 } 3212 for (i=0;i<n_sends;i++) { 3213 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3214 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3215 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3216 if (nis) { 3217 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3218 } 3219 } 3220 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3221 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3222 3223 /* assemble new l2g map */ 3224 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3225 ptr_idxs = recv_buffer_idxs; 3226 new_local_rows = 0; 3227 for (i=0;i<n_recvs;i++) { 3228 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3229 ptr_idxs += olengths_idxs[i]; 3230 } 3231 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3232 ptr_idxs = recv_buffer_idxs; 3233 new_local_rows = 0; 3234 for (i=0;i<n_recvs;i++) { 3235 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3236 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3237 ptr_idxs += olengths_idxs[i]; 3238 } 3239 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3240 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3241 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3242 3243 /* infer new local matrix type from received local matrices type */ 3244 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3245 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3246 if (n_recvs) { 3247 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3248 ptr_idxs = recv_buffer_idxs; 3249 for (i=0;i<n_recvs;i++) { 3250 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3251 new_local_type_private = MATAIJ_PRIVATE; 3252 break; 3253 } 3254 ptr_idxs += olengths_idxs[i]; 3255 } 3256 switch (new_local_type_private) { 3257 case MATDENSE_PRIVATE: 3258 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3259 new_local_type = MATSEQAIJ; 3260 bs = 1; 3261 } else { /* if I receive only 1 dense matrix */ 3262 new_local_type = MATSEQDENSE; 3263 bs = 1; 3264 } 3265 break; 3266 case MATAIJ_PRIVATE: 3267 new_local_type = MATSEQAIJ; 3268 bs = 1; 3269 break; 3270 case MATBAIJ_PRIVATE: 3271 new_local_type = MATSEQBAIJ; 3272 break; 3273 case MATSBAIJ_PRIVATE: 3274 new_local_type = MATSEQSBAIJ; 3275 break; 3276 default: 3277 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3278 break; 3279 } 3280 } else { /* by default, new_local_type is seqdense */ 3281 new_local_type = MATSEQDENSE; 3282 bs = 1; 3283 } 3284 3285 /* create MATIS object if needed */ 3286 if (reuse == MAT_INITIAL_MATRIX) { 3287 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3288 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3289 } else { 3290 /* it also destroys the local matrices */ 3291 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3292 } 3293 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3294 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3295 3296 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3297 3298 /* Global to local map of received indices */ 3299 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3300 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3301 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3302 3303 /* restore attributes -> type of incoming data and its size */ 3304 buf_size_idxs = 0; 3305 for (i=0;i<n_recvs;i++) { 3306 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3307 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3308 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3309 } 3310 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3311 3312 /* set preallocation */ 3313 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3314 if (!newisdense) { 3315 PetscInt *new_local_nnz=0; 3316 3317 ptr_vals = recv_buffer_vals; 3318 ptr_idxs = recv_buffer_idxs_local; 3319 if (n_recvs) { 3320 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3321 } 3322 for (i=0;i<n_recvs;i++) { 3323 PetscInt j; 3324 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3325 for (j=0;j<*(ptr_idxs+1);j++) { 3326 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3327 } 3328 } else { 3329 /* TODO */ 3330 } 3331 ptr_idxs += olengths_idxs[i]; 3332 } 3333 if (new_local_nnz) { 3334 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3335 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3336 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3337 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3338 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3339 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3340 } else { 3341 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3342 } 3343 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3344 } else { 3345 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3346 } 3347 3348 /* set values */ 3349 ptr_vals = recv_buffer_vals; 3350 ptr_idxs = recv_buffer_idxs_local; 3351 for (i=0;i<n_recvs;i++) { 3352 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3353 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3354 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3355 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3356 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3357 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3358 } else { 3359 /* TODO */ 3360 } 3361 ptr_idxs += olengths_idxs[i]; 3362 ptr_vals += olengths_vals[i]; 3363 } 3364 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3365 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3366 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3367 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3368 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3369 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3370 3371 #if 0 3372 if (!restrict_comm) { /* check */ 3373 Vec lvec,rvec; 3374 PetscReal infty_error; 3375 3376 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3377 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3378 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3379 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3380 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3381 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3382 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3383 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3384 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3385 } 3386 #endif 3387 3388 /* assemble new additional is (if any) */ 3389 if (nis) { 3390 PetscInt **temp_idxs,*count_is,j,psum; 3391 3392 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3393 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3394 ptr_idxs = recv_buffer_idxs_is; 3395 psum = 0; 3396 for (i=0;i<n_recvs;i++) { 3397 for (j=0;j<nis;j++) { 3398 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3399 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3400 psum += plen; 3401 ptr_idxs += plen+1; /* shift pointer to received data */ 3402 } 3403 } 3404 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3405 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3406 for (i=1;i<nis;i++) { 3407 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3408 } 3409 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3410 ptr_idxs = recv_buffer_idxs_is; 3411 for (i=0;i<n_recvs;i++) { 3412 for (j=0;j<nis;j++) { 3413 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3414 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3415 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3416 ptr_idxs += plen+1; /* shift pointer to received data */ 3417 } 3418 } 3419 for (i=0;i<nis;i++) { 3420 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3421 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3422 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3423 } 3424 ierr = PetscFree(count_is);CHKERRQ(ierr); 3425 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3426 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3427 } 3428 /* free workspace */ 3429 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3430 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3431 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3432 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3433 if (isdense) { 3434 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3435 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3436 } else { 3437 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3438 } 3439 if (nis) { 3440 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3441 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3442 } 3443 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3444 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3445 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3446 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3447 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3448 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3449 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3450 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3451 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3452 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3453 ierr = PetscFree(onodes);CHKERRQ(ierr); 3454 if (nis) { 3455 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3456 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3457 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3458 } 3459 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3460 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3461 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3462 for (i=0;i<nis;i++) { 3463 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3464 } 3465 } 3466 PetscFunctionReturn(0); 3467 } 3468 3469 /* temporary hack into ksp private data structure */ 3470 #include <petsc-private/kspimpl.h> 3471 3472 #undef __FUNCT__ 3473 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3474 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3475 { 3476 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3477 PC_IS *pcis = (PC_IS*)pc->data; 3478 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3479 MatNullSpace CoarseNullSpace=NULL; 3480 ISLocalToGlobalMapping coarse_islg; 3481 IS coarse_is,*isarray; 3482 PetscInt i,im_active=-1,active_procs=-1; 3483 PetscInt nis,nisdofs,nisneu; 3484 PC pc_temp; 3485 PCType coarse_pc_type; 3486 KSPType coarse_ksp_type; 3487 PetscBool multilevel_requested,multilevel_allowed; 3488 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3489 Mat t_coarse_mat_is; 3490 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3491 PetscMPIInt all_procs; 3492 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3493 PetscBool compute_vecs = PETSC_FALSE; 3494 PetscScalar *array; 3495 PetscErrorCode ierr; 3496 3497 PetscFunctionBegin; 3498 /* Assign global numbering to coarse dofs */ 3499 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3500 compute_vecs = PETSC_TRUE; 3501 PetscInt ocoarse_size; 3502 ocoarse_size = pcbddc->coarse_size; 3503 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3504 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3505 /* see if we can avoid some work */ 3506 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3507 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3508 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3509 coarse_reuse = PETSC_FALSE; 3510 } else { /* we can safely reuse already computed coarse matrix */ 3511 coarse_reuse = PETSC_TRUE; 3512 } 3513 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3514 coarse_reuse = PETSC_FALSE; 3515 } 3516 /* reset any subassembling information */ 3517 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3518 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3519 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3520 coarse_reuse = PETSC_TRUE; 3521 } 3522 3523 /* count "active" (i.e. with positive local size) and "void" processes */ 3524 im_active = !!(pcis->n); 3525 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3526 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3527 void_procs = all_procs-active_procs; 3528 csin_type_simple = PETSC_TRUE; 3529 redist = PETSC_FALSE; 3530 if (pcbddc->current_level && void_procs) { 3531 csin_ml = PETSC_TRUE; 3532 ncoarse_ml = void_procs; 3533 csin_ds = PETSC_TRUE; 3534 ncoarse_ds = void_procs; 3535 } else { 3536 csin_ml = PETSC_FALSE; 3537 ncoarse_ml = all_procs; 3538 if (void_procs) { 3539 csin_ds = PETSC_TRUE; 3540 ncoarse_ds = void_procs; 3541 csin_type_simple = PETSC_FALSE; 3542 } else { 3543 if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) { 3544 csin_ds = PETSC_TRUE; 3545 ncoarse_ds = pcbddc->redistribute_coarse; 3546 redist = PETSC_TRUE; 3547 } else { 3548 csin_ds = PETSC_FALSE; 3549 ncoarse_ds = all_procs; 3550 } 3551 } 3552 } 3553 3554 /* 3555 test if we can go multilevel: three conditions must be satisfied: 3556 - we have not exceeded the number of levels requested 3557 - we can actually subassemble the active processes 3558 - we can find a suitable number of MPI processes where we can place the subassembled problem 3559 */ 3560 multilevel_allowed = PETSC_FALSE; 3561 multilevel_requested = PETSC_FALSE; 3562 if (pcbddc->current_level < pcbddc->max_levels) { 3563 multilevel_requested = PETSC_TRUE; 3564 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3565 multilevel_allowed = PETSC_FALSE; 3566 } else { 3567 multilevel_allowed = PETSC_TRUE; 3568 } 3569 } 3570 /* determine number of process partecipating to coarse solver */ 3571 if (multilevel_allowed) { 3572 ncoarse = ncoarse_ml; 3573 csin = csin_ml; 3574 } else { 3575 ncoarse = ncoarse_ds; 3576 csin = csin_ds; 3577 } 3578 3579 /* creates temporary l2gmap and IS for coarse indexes */ 3580 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3581 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3582 3583 /* creates temporary MATIS object for coarse matrix */ 3584 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3585 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3586 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3587 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3588 #if 0 3589 { 3590 PetscViewer viewer; 3591 char filename[256]; 3592 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3593 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3594 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3595 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3596 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3597 } 3598 #endif 3599 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3600 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3601 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3602 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3603 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3604 3605 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3606 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3607 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3608 const PetscInt *idxs; 3609 ISLocalToGlobalMapping tmap; 3610 3611 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3612 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3613 /* allocate space for temporary storage */ 3614 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3615 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3616 /* allocate for IS array */ 3617 nisdofs = pcbddc->n_ISForDofsLocal; 3618 nisneu = !!pcbddc->NeumannBoundariesLocal; 3619 nis = nisdofs + nisneu; 3620 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3621 /* dofs splitting */ 3622 for (i=0;i<nisdofs;i++) { 3623 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3624 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3625 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3626 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3627 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3628 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3629 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3630 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3631 } 3632 /* neumann boundaries */ 3633 if (pcbddc->NeumannBoundariesLocal) { 3634 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3635 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3636 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3637 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3638 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3639 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3640 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3641 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3642 } 3643 /* free memory */ 3644 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3645 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3646 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3647 } else { 3648 nis = 0; 3649 nisdofs = 0; 3650 nisneu = 0; 3651 isarray = NULL; 3652 } 3653 /* destroy no longer needed map */ 3654 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3655 3656 /* restrict on coarse candidates (if needed) */ 3657 coarse_mat_is = NULL; 3658 if (csin) { 3659 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3660 if (redist) { 3661 PetscMPIInt rank; 3662 PetscInt spc,n_spc_p1,dest[1]; 3663 3664 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3665 spc = all_procs/pcbddc->redistribute_coarse; 3666 n_spc_p1 = all_procs%pcbddc->redistribute_coarse; 3667 if (rank > n_spc_p1*(spc+1)-1) { 3668 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 3669 } else { 3670 dest[0] = rank/(spc+1); 3671 } 3672 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3673 } else { 3674 PetscInt j,tissize,*nisindices; 3675 PetscInt *coarse_candidates; 3676 const PetscInt* tisindices; 3677 /* get coarse candidates' ranks in pc communicator */ 3678 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 3679 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3680 for (i=0,j=0;i<all_procs;i++) { 3681 if (!coarse_candidates[i]) { 3682 coarse_candidates[j]=i; 3683 j++; 3684 } 3685 } 3686 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3687 /* get a suitable subassembling pattern */ 3688 if (csin_type_simple) { 3689 PetscMPIInt rank; 3690 PetscInt issize,isidx; 3691 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3692 if (im_active) { 3693 issize = 1; 3694 isidx = (PetscInt)rank; 3695 } else { 3696 issize = 0; 3697 isidx = -1; 3698 } 3699 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3700 } else { 3701 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3702 } 3703 if (pcbddc->dbg_flag) { 3704 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3705 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3706 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3707 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3708 for (i=0;i<j;i++) { 3709 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3710 } 3711 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3712 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3713 } 3714 /* shift the pattern on coarse candidates */ 3715 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3716 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3717 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 3718 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3719 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3720 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3721 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3722 } 3723 } 3724 if (pcbddc->dbg_flag) { 3725 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3726 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3727 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3728 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3729 } 3730 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3731 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3732 } else { 3733 if (pcbddc->dbg_flag) { 3734 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3735 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3736 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3737 } 3738 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3739 coarse_mat_is = t_coarse_mat_is; 3740 } 3741 3742 /* create local to global scatters for coarse problem */ 3743 if (compute_vecs) { 3744 PetscInt lrows; 3745 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3746 if (coarse_mat_is) { 3747 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3748 } else { 3749 lrows = 0; 3750 } 3751 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3752 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3753 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3754 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3755 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3756 } 3757 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3758 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3759 3760 /* set defaults for coarse KSP and PC */ 3761 if (multilevel_allowed) { 3762 coarse_ksp_type = KSPRICHARDSON; 3763 coarse_pc_type = PCBDDC; 3764 } else { 3765 coarse_ksp_type = KSPPREONLY; 3766 coarse_pc_type = PCREDUNDANT; 3767 } 3768 3769 /* print some info if requested */ 3770 if (pcbddc->dbg_flag) { 3771 if (!multilevel_allowed) { 3772 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3773 if (multilevel_requested) { 3774 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3775 } else if (pcbddc->max_levels) { 3776 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3777 } 3778 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3779 } 3780 } 3781 3782 /* create the coarse KSP object only once with defaults */ 3783 if (coarse_mat_is) { 3784 MatReuse coarse_mat_reuse; 3785 PetscViewer dbg_viewer = NULL; 3786 if (pcbddc->dbg_flag) { 3787 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3788 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3789 } 3790 if (!pcbddc->coarse_ksp) { 3791 char prefix[256],str_level[16]; 3792 size_t len; 3793 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3794 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3795 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3796 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3797 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3798 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3799 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3800 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3801 /* prefix */ 3802 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3803 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3804 if (!pcbddc->current_level) { 3805 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3806 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3807 } else { 3808 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3809 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3810 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3811 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3812 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3813 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3814 } 3815 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3816 /* allow user customization */ 3817 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3818 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3819 } 3820 3821 /* get some info after set from options */ 3822 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3823 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3824 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3825 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3826 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3827 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3828 isbddc = PETSC_FALSE; 3829 } 3830 if (isredundant) { 3831 KSP inner_ksp; 3832 PC inner_pc; 3833 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3834 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3835 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3836 } 3837 3838 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3839 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3840 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3841 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3842 if (nisdofs) { 3843 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3844 for (i=0;i<nisdofs;i++) { 3845 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3846 } 3847 } 3848 if (nisneu) { 3849 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3850 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3851 } 3852 3853 /* assemble coarse matrix */ 3854 if (coarse_reuse) { 3855 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3856 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3857 coarse_mat_reuse = MAT_REUSE_MATRIX; 3858 } else { 3859 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3860 } 3861 if (isbddc || isnn) { 3862 if (pcbddc->coarsening_ratio > 1) { 3863 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3864 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3865 if (pcbddc->dbg_flag) { 3866 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3867 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3868 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3869 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3870 } 3871 } 3872 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3873 } else { 3874 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 3875 coarse_mat = coarse_mat_is; 3876 } 3877 } else { 3878 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3879 } 3880 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3881 3882 /* propagate symmetry info to coarse matrix */ 3883 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3884 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3885 3886 /* set operators */ 3887 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3888 if (pcbddc->dbg_flag) { 3889 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3890 } 3891 } else { /* processes non partecipating to coarse solver (if any) */ 3892 coarse_mat = 0; 3893 } 3894 ierr = PetscFree(isarray);CHKERRQ(ierr); 3895 #if 0 3896 { 3897 PetscViewer viewer; 3898 char filename[256]; 3899 sprintf(filename,"coarse_mat.m"); 3900 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3901 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3902 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3903 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3904 } 3905 #endif 3906 3907 /* Compute coarse null space (special handling by BDDC only) */ 3908 if (pcbddc->NullSpace) { 3909 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3910 } 3911 3912 if (pcbddc->coarse_ksp) { 3913 Vec crhs,csol; 3914 PetscBool ispreonly; 3915 if (CoarseNullSpace) { 3916 if (isbddc) { 3917 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3918 } else { 3919 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3920 } 3921 } 3922 /* setup coarse ksp */ 3923 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3924 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3925 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3926 /* hack */ 3927 if (!csol) { 3928 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3929 } 3930 if (!crhs) { 3931 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3932 } 3933 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3934 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3935 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3936 KSP check_ksp; 3937 KSPType check_ksp_type; 3938 PC check_pc; 3939 Vec check_vec,coarse_vec; 3940 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 3941 PetscInt its; 3942 PetscBool compute_eigs; 3943 PetscReal *eigs_r,*eigs_c; 3944 PetscInt neigs; 3945 const char *prefix; 3946 3947 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3948 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3949 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3950 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3951 if (ispreonly) { 3952 check_ksp_type = KSPPREONLY; 3953 compute_eigs = PETSC_FALSE; 3954 } else { 3955 check_ksp_type = KSPGMRES; 3956 compute_eigs = PETSC_TRUE; 3957 } 3958 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3959 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3960 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3961 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3962 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 3963 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 3964 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 3965 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 3966 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3967 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3968 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3969 /* create random vec */ 3970 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 3971 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 3972 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3973 if (CoarseNullSpace) { 3974 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3975 } 3976 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3977 /* solve coarse problem */ 3978 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 3979 if (CoarseNullSpace) { 3980 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 3981 } 3982 /* set eigenvalue estimation if preonly has not been requested */ 3983 if (compute_eigs) { 3984 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 3985 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 3986 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 3987 lambda_max = eigs_r[neigs-1]; 3988 lambda_min = eigs_r[0]; 3989 if (pcbddc->use_coarse_estimates) { 3990 if (lambda_max>lambda_min) { 3991 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3992 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3993 } 3994 } 3995 } 3996 3997 /* check coarse problem residual error */ 3998 if (pcbddc->dbg_flag) { 3999 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4000 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4001 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4002 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4003 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4004 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4005 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4006 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4007 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4008 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4009 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4010 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4011 if (compute_eigs) { 4012 PetscReal lambda_max_s,lambda_min_s; 4013 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4014 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4015 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4016 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4017 for (i=0;i<neigs;i++) { 4018 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4019 } 4020 } 4021 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4022 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4023 } 4024 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4025 if (compute_eigs) { 4026 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4027 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4028 } 4029 } 4030 } 4031 /* print additional info */ 4032 if (pcbddc->dbg_flag) { 4033 /* waits until all processes reaches this point */ 4034 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4035 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4036 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4037 } 4038 4039 /* free memory */ 4040 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4041 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4042 PetscFunctionReturn(0); 4043 } 4044 4045 #undef __FUNCT__ 4046 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4047 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4048 { 4049 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4050 PC_IS* pcis = (PC_IS*)pc->data; 4051 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4052 PetscInt i,coarse_size; 4053 PetscInt *local_primal_indices; 4054 PetscErrorCode ierr; 4055 4056 PetscFunctionBegin; 4057 /* Compute global number of coarse dofs */ 4058 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4059 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4060 } 4061 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4062 4063 /* check numbering */ 4064 if (pcbddc->dbg_flag) { 4065 PetscScalar coarsesum,*array; 4066 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4067 4068 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4069 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4070 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4071 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4072 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4073 for (i=0;i<pcbddc->local_primal_size;i++) { 4074 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4075 } 4076 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4077 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4078 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4079 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4080 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4081 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4082 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4083 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4084 for (i=0;i<pcis->n;i++) { 4085 if (array[i] == 1.0) { 4086 set_error = PETSC_TRUE; 4087 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4088 } 4089 } 4090 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4091 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4092 for (i=0;i<pcis->n;i++) { 4093 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4094 } 4095 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4096 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4097 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4098 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4099 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4100 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4101 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4102 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4103 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4104 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4105 for (i=0;i<pcbddc->local_primal_size;i++) { 4106 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4107 } 4108 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4109 } 4110 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4111 if (set_error_reduced) { 4112 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4113 } 4114 } 4115 /* get back data */ 4116 *coarse_size_n = coarse_size; 4117 *local_primal_indices_n = local_primal_indices; 4118 PetscFunctionReturn(0); 4119 } 4120 4121 #undef __FUNCT__ 4122 #define __FUNCT__ "PCBDDCGlobalToLocal" 4123 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4124 { 4125 IS localis_t; 4126 PetscInt i,lsize,*idxs,n; 4127 PetscScalar *vals; 4128 PetscErrorCode ierr; 4129 4130 PetscFunctionBegin; 4131 /* get indices in local ordering exploiting local to global map */ 4132 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4133 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4134 for (i=0;i<lsize;i++) vals[i] = 1.0; 4135 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4136 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4137 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4138 if (idxs) { /* multilevel guard */ 4139 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4140 } 4141 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4142 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4143 ierr = PetscFree(vals);CHKERRQ(ierr); 4144 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4145 /* now compute set in local ordering */ 4146 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4147 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4148 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4149 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4150 for (i=0,lsize=0;i<n;i++) { 4151 if (PetscRealPart(vals[i]) > 0.5) { 4152 lsize++; 4153 } 4154 } 4155 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4156 for (i=0,lsize=0;i<n;i++) { 4157 if (PetscRealPart(vals[i]) > 0.5) { 4158 idxs[lsize++] = i; 4159 } 4160 } 4161 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4162 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4163 *localis = localis_t; 4164 PetscFunctionReturn(0); 4165 } 4166 4167 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4168 #undef __FUNCT__ 4169 #define __FUNCT__ "PCBDDCMatMult_Private" 4170 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4171 { 4172 PCBDDCChange_ctx change_ctx; 4173 PetscErrorCode ierr; 4174 4175 PetscFunctionBegin; 4176 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4177 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4178 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4179 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4180 PetscFunctionReturn(0); 4181 } 4182 4183 #undef __FUNCT__ 4184 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4185 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4186 { 4187 PCBDDCChange_ctx change_ctx; 4188 PetscErrorCode ierr; 4189 4190 PetscFunctionBegin; 4191 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4192 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4193 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4194 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4195 PetscFunctionReturn(0); 4196 } 4197