1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCSetUpSolvers" 10 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscScalar *coarse_submat_vals; 14 PetscErrorCode ierr; 15 16 PetscFunctionBegin; 17 /* Compute matrix after change of basis and extract local submatrices */ 18 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D */ 21 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 22 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 23 24 /* Setup local solvers ksp_D and ksp_R */ 25 /* PCBDDCSetUpLocalScatters should be called first! */ 26 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 27 28 /* Change global null space passed in by the user if change of basis has been requested */ 29 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 30 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 31 } 32 33 /* 34 Setup local correction and local part of coarse basis. 35 Gives back the dense local part of the coarse matrix in column major ordering 36 */ 37 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 38 39 /* Compute total number of coarse nodes and setup coarse solver */ 40 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 41 42 /* free */ 43 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 #undef __FUNCT__ 48 #define __FUNCT__ "PCBDDCResetCustomization" 49 PetscErrorCode PCBDDCResetCustomization(PC pc) 50 { 51 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 52 PetscErrorCode ierr; 53 54 PetscFunctionBegin; 55 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 57 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 59 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 61 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 62 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 63 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 64 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 65 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 #undef __FUNCT__ 70 #define __FUNCT__ "PCBDDCResetTopography" 71 PetscErrorCode PCBDDCResetTopography(PC pc) 72 { 73 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 74 PetscErrorCode ierr; 75 76 PetscFunctionBegin; 77 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 78 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 79 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 80 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 81 PetscFunctionReturn(0); 82 } 83 84 #undef __FUNCT__ 85 #define __FUNCT__ "PCBDDCResetSolvers" 86 PetscErrorCode PCBDDCResetSolvers(PC pc) 87 { 88 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 89 PetscErrorCode ierr; 90 91 PetscFunctionBegin; 92 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 93 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 100 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 101 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 102 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 103 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 104 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 106 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 107 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 108 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 109 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 110 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 111 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 112 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 113 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 114 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 115 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 116 PetscFunctionReturn(0); 117 } 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 121 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 122 { 123 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 124 PC_IS *pcis = (PC_IS*)pc->data; 125 VecType impVecType; 126 PetscInt n_constraints,n_R,old_size; 127 PetscErrorCode ierr; 128 129 PetscFunctionBegin; 130 if (!pcbddc->ConstraintMatrix) { 131 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 132 } 133 /* get sizes */ 134 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 135 n_R = pcis->n-pcbddc->n_actual_vertices; 136 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 137 /* local work vectors (try to avoid unneeded work)*/ 138 /* R nodes */ 139 old_size = -1; 140 if (pcbddc->vec1_R) { 141 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 142 } 143 if (n_R != old_size) { 144 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 145 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 146 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 147 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 148 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 149 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 150 } 151 /* local primal dofs */ 152 old_size = -1; 153 if (pcbddc->vec1_P) { 154 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 155 } 156 if (pcbddc->local_primal_size != old_size) { 157 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 158 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 159 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 160 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 161 } 162 /* local explicit constraints */ 163 old_size = -1; 164 if (pcbddc->vec1_C) { 165 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 166 } 167 if (n_constraints && n_constraints != old_size) { 168 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 169 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 170 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 171 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 172 } 173 PetscFunctionReturn(0); 174 } 175 176 #undef __FUNCT__ 177 #define __FUNCT__ "PCBDDCSetUpCorrection" 178 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 179 { 180 PetscErrorCode ierr; 181 /* pointers to pcis and pcbddc */ 182 PC_IS* pcis = (PC_IS*)pc->data; 183 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 184 /* submatrices of local problem */ 185 Mat A_RV,A_VR,A_VV; 186 /* working matrices */ 187 Mat M1,M2,M3,C_CR; 188 /* working vectors */ 189 Vec vec1_C,vec2_C,vec1_V,vec2_V; 190 /* additional working stuff */ 191 IS is_aux; 192 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 193 const PetscScalar *array,*row_cmat_values; 194 const PetscInt *row_cmat_indices,*idx_R_local; 195 PetscInt *idx_V_B,*auxindices; 196 PetscInt n_vertices,n_constraints,size_of_constraint; 197 PetscInt i,j,n_R,n_D,n_B; 198 PetscBool unsymmetric_check; 199 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 200 MatType impMatType; 201 /* some shortcuts to scalars */ 202 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 203 /* for debugging purposes */ 204 PetscReal *coarsefunctions_errors,*constraints_errors; 205 206 PetscFunctionBegin; 207 /* get number of vertices (corners plus constraints with change of basis) 208 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 209 n_vertices = pcbddc->n_actual_vertices; 210 n_constraints = pcbddc->local_primal_size-n_vertices; 211 /* Set Non-overlapping dimensions */ 212 n_B = pcis->n_B; n_D = pcis->n - n_B; 213 n_R = pcis->n-n_vertices; 214 215 /* Set types for local objects needed by BDDC precondtioner */ 216 impMatType = MATSEQDENSE; 217 218 /* Allocating some extra storage just to be safe */ 219 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 220 for (i=0;i<pcis->n;i++) auxindices[i]=i; 221 222 /* vertices in boundary numbering */ 223 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 224 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 225 if (i != n_vertices) { 226 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 227 } 228 229 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 230 if (n_constraints) { 231 /* see if we can save some allocations */ 232 if (pcbddc->local_auxmat2) { 233 PetscInt on_R,on_constraints; 234 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 235 if (on_R != n_R || on_constraints != n_constraints) { 236 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 237 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 238 } 239 } 240 /* work vectors */ 241 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 242 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 243 /* auxiliary matrices */ 244 if (!pcbddc->local_auxmat2) { 245 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 246 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 247 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 248 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 249 } 250 251 /* Extract constraints on R nodes: C_{CR} */ 252 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 253 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 254 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 255 256 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 257 for (i=0;i<n_constraints;i++) { 258 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 259 /* Get row of constraint matrix in R numbering */ 260 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 261 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 262 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 263 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 264 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 265 /* Solve for row of constraint matrix in R numbering */ 266 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 267 /* Set values in local_auxmat2 */ 268 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 269 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 270 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 271 } 272 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 273 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 274 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 275 276 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 277 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 278 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 279 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 280 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 281 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 282 ierr = MatSetUp(M1);CHKERRQ(ierr); 283 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 284 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 285 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 286 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 287 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 288 ierr = MatDestroy(&M2);CHKERRQ(ierr); 289 ierr = MatDestroy(&M3);CHKERRQ(ierr); 290 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 291 if (!pcbddc->local_auxmat1) { 292 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 293 } else { 294 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 295 } 296 } 297 298 /* Get submatrices from subdomain matrix */ 299 if (n_vertices) { 300 PetscInt ibs,mbs; 301 PetscBool issbaij; 302 Mat newmat; 303 304 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 305 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 306 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 307 if (ibs != mbs) { /* need to convert to SEQAIJ */ 308 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 309 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 310 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 311 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 312 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 313 } else { 314 /* this is safe */ 315 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 316 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 317 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 318 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 319 /* which of the two approaches is faster? */ 320 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 321 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 322 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 323 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 324 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 325 } else { 326 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 327 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 328 } 329 } 330 ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 331 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 332 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 333 } 334 335 /* Matrix of coarse basis functions (local) */ 336 if (pcbddc->coarse_phi_B) { 337 PetscInt on_B,on_primal; 338 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 339 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 340 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 341 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 342 } 343 } 344 if (pcbddc->coarse_phi_D) { 345 PetscInt on_D,on_primal; 346 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 347 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 348 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 349 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 350 } 351 } 352 if (!pcbddc->coarse_phi_B) { 353 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 354 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 355 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 356 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 357 } 358 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 359 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 360 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 361 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 362 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 363 } 364 365 if (pcbddc->dbg_flag) { 366 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 367 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 368 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 369 } 370 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 371 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 372 373 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 374 375 /* vertices */ 376 for (i=0;i<n_vertices;i++) { 377 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 378 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 379 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 380 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 381 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 382 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 383 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 384 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 385 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 386 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 387 if (n_constraints) { 388 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 389 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 390 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 391 } 392 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 393 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 394 395 /* Set values in coarse basis function and subdomain part of coarse_mat */ 396 /* coarse basis functions */ 397 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 398 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 399 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 400 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 401 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 402 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 403 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 404 if (pcbddc->switch_static || pcbddc->dbg_flag) { 405 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 406 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 407 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 408 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 409 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 410 } 411 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 412 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 413 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 415 if (n_constraints) { 416 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 417 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 418 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 419 } 420 421 /* check */ 422 if (pcbddc->dbg_flag) { 423 /* assemble subdomain vector on local nodes */ 424 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 425 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 426 if (n_R) { 427 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 428 } 429 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 430 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 431 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 432 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 433 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 434 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 435 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 436 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 437 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 438 if (n_constraints) { 439 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 440 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 441 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 442 } 443 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 444 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 445 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 446 /* check saddle point solution */ 447 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 448 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 449 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 450 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 451 /* shift by the identity matrix */ 452 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 453 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 454 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 455 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 456 } 457 } 458 459 /* constraints */ 460 for (i=0;i<n_constraints;i++) { 461 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 462 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 463 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 464 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 465 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 466 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 467 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 468 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 469 if (n_vertices) { 470 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 471 } 472 /* Set values in coarse basis function and subdomain part of coarse_mat */ 473 /* coarse basis functions */ 474 j = i+n_vertices; /* don't touch this! */ 475 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 476 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 479 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 480 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 481 if (pcbddc->switch_static || pcbddc->dbg_flag) { 482 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 483 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 484 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 485 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 486 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 487 } 488 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 489 if (n_vertices) { 490 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 491 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 492 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 493 } 494 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 495 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 496 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 497 498 if (pcbddc->dbg_flag) { 499 /* assemble subdomain vector on nodes */ 500 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 501 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 502 if (n_R) { 503 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 504 } 505 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 506 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 507 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 508 /* assemble subdomain vector of lagrange multipliers */ 509 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 510 if (n_vertices) { 511 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 512 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 513 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 514 } 515 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 516 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 517 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 518 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 519 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 520 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 521 /* check saddle point solution */ 522 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 523 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 524 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 525 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 526 /* shift by the identity matrix */ 527 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 528 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 529 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 530 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 531 } 532 } 533 /* call assembling routines for local coarse basis */ 534 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 535 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 536 if (pcbddc->switch_static || pcbddc->dbg_flag) { 537 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 538 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 539 } 540 541 /* compute other basis functions for non-symmetric problems */ 542 /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/ 543 { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */ 544 PetscBool setsym; 545 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr); 546 if (!setsym) pcbddc->issym = PETSC_FALSE; 547 } 548 549 if (!pcbddc->issym) { 550 if (!pcbddc->coarse_psi_B) { 551 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 552 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 553 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 554 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 555 } 556 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 557 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 558 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 559 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 560 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 561 } 562 for (i=0;i<pcbddc->local_primal_size;i++) { 563 if (n_constraints) { 564 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 565 for (j=0;j<n_constraints;j++) { 566 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 567 } 568 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 569 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 570 } 571 if (i<n_vertices) { 572 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 573 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 574 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 575 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 576 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 577 if (n_constraints) { 578 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 579 } 580 } else { 581 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 582 } 583 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 584 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 585 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 586 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 587 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 588 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 589 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 590 if (i<n_vertices) { 591 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 592 } 593 if (pcbddc->switch_static || pcbddc->dbg_flag) { 594 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 595 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 596 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 597 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 598 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 599 } 600 601 if (pcbddc->dbg_flag) { 602 /* assemble subdomain vector on nodes */ 603 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 604 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 605 if (n_R) { 606 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 607 } 608 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 609 if (i<n_vertices) { 610 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 611 } 612 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 613 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 614 /* assemble subdomain vector of lagrange multipliers */ 615 for (j=0;j<pcbddc->local_primal_size;j++) { 616 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 617 } 618 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 619 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 620 /* check saddle point solution */ 621 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 622 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 623 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 624 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 625 /* shift by the identity matrix */ 626 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 627 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 628 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 629 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 630 } 631 } 632 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 633 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 634 if (pcbddc->switch_static || pcbddc->dbg_flag) { 635 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 636 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 637 } 638 unsymmetric_check = PETSC_TRUE; 639 } else { /* take references to already computed coarse basis */ 640 unsymmetric_check = PETSC_FALSE; 641 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 642 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 643 if (pcbddc->coarse_phi_D) { 644 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 645 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 646 } 647 } 648 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 649 /* Checking coarse_sub_mat and coarse basis functios */ 650 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 651 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 652 if (pcbddc->dbg_flag) { 653 Mat coarse_sub_mat; 654 Mat AUXMAT,TM1,TM2,TM3,TM4; 655 Mat coarse_phi_D,coarse_phi_B; 656 Mat coarse_psi_D,coarse_psi_B; 657 Mat A_II,A_BB,A_IB,A_BI; 658 MatType checkmattype=MATSEQAIJ; 659 PetscReal real_value; 660 661 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 662 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 663 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 664 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 665 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 666 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 667 if (unsymmetric_check) { 668 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 669 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 670 } 671 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 672 673 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 674 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 675 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 676 if (unsymmetric_check) { 677 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 678 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 679 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 680 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 681 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 682 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 683 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 684 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 685 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 686 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 687 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 688 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 689 } else { 690 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 691 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 692 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 693 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 694 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 695 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 696 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 697 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 698 } 699 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 700 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 701 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 702 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 703 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 704 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 705 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 708 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 709 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 710 for (i=0;i<pcbddc->local_primal_size;i++) { 711 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 712 } 713 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 714 for (i=0;i<pcbddc->local_primal_size;i++) { 715 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 716 } 717 if (unsymmetric_check) { 718 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 719 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 720 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 721 } 722 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 723 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 724 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 725 } 726 } 727 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 728 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 729 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 730 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 731 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 732 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 733 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 734 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 735 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 736 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 737 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 738 if (unsymmetric_check) { 739 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 740 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 741 } 742 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 743 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 744 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 745 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 746 } 747 /* free memory */ 748 if (n_vertices) { 749 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 750 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 751 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 752 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 753 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 754 } 755 if (n_constraints) { 756 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 757 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 758 ierr = MatDestroy(&M1);CHKERRQ(ierr); 759 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 760 } 761 ierr = PetscFree(auxindices);CHKERRQ(ierr); 762 /* get back data */ 763 *coarse_submat_vals_n = coarse_submat_vals; 764 PetscFunctionReturn(0); 765 } 766 767 #undef __FUNCT__ 768 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 769 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 770 { 771 PC_IS* pcis = (PC_IS*)(pc->data); 772 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 773 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 774 PetscBool issbaij,isseqaij; 775 /* manage repeated solves */ 776 MatReuse reuse; 777 PetscErrorCode ierr; 778 779 PetscFunctionBegin; 780 if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) { 781 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 782 } 783 /* get mat flags */ 784 reuse = MAT_INITIAL_MATRIX; 785 if (pc->setupcalled) { 786 if (pc->flag == SAME_NONZERO_PATTERN) { 787 reuse = MAT_REUSE_MATRIX; 788 } else { 789 reuse = MAT_INITIAL_MATRIX; 790 } 791 } 792 if (reuse == MAT_INITIAL_MATRIX) { 793 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 794 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 795 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 796 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 797 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 798 } 799 800 /* transform local matrices if needed */ 801 if (pcbddc->ChangeOfBasisMatrix) { 802 Mat *change_mat_all; 803 IS is_local,is_global; 804 PetscBool sorted; 805 PetscInt *idxs_perm; 806 807 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_local);CHKERRQ(ierr); 808 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 809 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 810 ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr); 811 if (!sorted) { 812 const PetscInt *idxs; 813 PetscInt *idxs_sorted,i; 814 815 ierr = PetscMalloc1(pcis->n,&idxs_perm);CHKERRQ(ierr); 816 ierr = PetscMalloc1(pcis->n,&idxs_sorted);CHKERRQ(ierr); 817 for (i=0;i<pcis->n;i++) { 818 idxs_perm[i] = i; 819 } 820 ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr); 821 ierr = PetscSortIntWithPermutation(pcis->n,idxs,idxs_perm);CHKERRQ(ierr); 822 for (i=0;i<pcis->n;i++) { 823 idxs_sorted[i] = idxs[idxs_perm[i]]; 824 } 825 ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr); 826 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 827 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr); 828 } 829 830 /* get change of basis on the whole set of local dofs */ 831 ierr = MatGetSubMatrices(pcbddc->ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr); 832 833 if (!sorted) { 834 Mat new_mat; 835 IS is_perm; 836 PetscInt *idxs,i; 837 838 ierr = PetscMalloc1(pcis->n,&idxs);CHKERRQ(ierr); 839 for (i=0;i<pcis->n;i++) { 840 idxs[idxs_perm[i]] = i; 841 } 842 ierr = PetscFree(idxs_perm);CHKERRQ(ierr); 843 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr); 844 ierr = ISSetPermutation(is_perm);CHKERRQ(ierr); 845 ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr); 846 ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr); 847 change_mat_all[0] = new_mat; 848 ierr = ISDestroy(&is_perm);CHKERRQ(ierr); 849 } 850 851 /* check */ 852 if (pcbddc->dbg_flag) { 853 Vec x,x_change; 854 PetscReal error; 855 856 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 857 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 858 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 859 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 860 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 861 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 ierr = MatMult(change_mat_all[0],pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 863 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 864 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 865 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 866 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 867 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 868 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 869 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 870 ierr = VecDestroy(&x);CHKERRQ(ierr); 871 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 872 } 873 874 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 875 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 876 if (isseqaij) { 877 ierr = MatPtAP(matis->A,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 878 } else { 879 Mat work_mat; 880 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 881 ierr = MatPtAP(work_mat,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 882 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 883 } 884 /* 885 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 886 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 887 */ 888 ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr); 889 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 890 } else { 891 /* without change of basis, the local matrix is unchanged */ 892 if (!pcbddc->local_mat) { 893 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 894 pcbddc->local_mat = matis->A; 895 } 896 } 897 898 /* get submatrices */ 899 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 900 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 901 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 902 if (!issbaij) { 903 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 904 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 905 } else { 906 Mat newmat; 907 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 908 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 909 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 910 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 911 } 912 PetscFunctionReturn(0); 913 } 914 915 #undef __FUNCT__ 916 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 917 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 918 { 919 PC_IS* pcis = (PC_IS*)(pc->data); 920 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 921 IS is_aux1,is_aux2; 922 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 923 PetscInt n_vertices,i,j,n_R,n_D,n_B; 924 PetscInt vbs,bs; 925 PetscBT bitmask; 926 PetscErrorCode ierr; 927 928 PetscFunctionBegin; 929 /* 930 No need to setup local scatters if 931 - primal space is unchanged 932 AND 933 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 934 AND 935 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 936 */ 937 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 938 PetscFunctionReturn(0); 939 } 940 /* destroy old objects */ 941 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 942 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 943 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 944 /* Set Non-overlapping dimensions */ 945 n_B = pcis->n_B; n_D = pcis->n - n_B; 946 n_vertices = pcbddc->n_actual_vertices; 947 /* create auxiliary bitmask */ 948 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 949 for (i=0;i<n_vertices;i++) { 950 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 951 } 952 953 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 954 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 955 for (i=0, n_R=0; i<pcis->n; i++) { 956 if (!PetscBTLookup(bitmask,i)) { 957 idx_R_local[n_R] = i; 958 n_R++; 959 } 960 } 961 962 /* Block code */ 963 vbs = 1; 964 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 965 if (bs>1 && !(n_vertices%bs)) { 966 PetscBool is_blocked = PETSC_TRUE; 967 PetscInt *vary; 968 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 969 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 970 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 971 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 972 for (i=0; i<n_vertices; i++) { 973 if (vary[i]!=0 && vary[i]!=bs) { 974 is_blocked = PETSC_FALSE; 975 break; 976 } 977 } 978 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 979 vbs = bs; 980 for (i=0;i<n_R/vbs;i++) { 981 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 982 } 983 } 984 ierr = PetscFree(vary);CHKERRQ(ierr); 985 } 986 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 987 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 988 989 /* print some info if requested */ 990 if (pcbddc->dbg_flag) { 991 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 992 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 993 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 994 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 995 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 996 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 997 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 998 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 999 } 1000 1001 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1002 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1003 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1004 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1005 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1006 for (i=0; i<n_D; i++) { 1007 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1008 } 1009 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1010 for (i=0, j=0; i<n_R; i++) { 1011 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1012 aux_array1[j++] = i; 1013 } 1014 } 1015 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1016 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1017 for (i=0, j=0; i<n_B; i++) { 1018 if (!PetscBTLookup(bitmask,is_indices[i])) { 1019 aux_array2[j++] = i; 1020 } 1021 } 1022 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1023 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1024 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1025 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1026 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1027 1028 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1029 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1030 for (i=0, j=0; i<n_R; i++) { 1031 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1032 aux_array1[j++] = i; 1033 } 1034 } 1035 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1036 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1037 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1038 } 1039 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1040 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1047 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1048 { 1049 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1050 PC_IS *pcis = (PC_IS*)pc->data; 1051 PC pc_temp; 1052 Mat A_RR; 1053 MatReuse reuse; 1054 PetscScalar m_one = -1.0; 1055 PetscReal value; 1056 PetscInt n_D,n_R,ibs,mbs; 1057 PetscBool use_exact,use_exact_reduced,issbaij; 1058 PetscErrorCode ierr; 1059 /* prefixes stuff */ 1060 char dir_prefix[256],neu_prefix[256],str_level[16]; 1061 size_t len; 1062 1063 PetscFunctionBegin; 1064 1065 /* compute prefixes */ 1066 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1067 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1068 if (!pcbddc->current_level) { 1069 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1070 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1071 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1072 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1073 } else { 1074 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1075 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1076 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1077 len -= 15; /* remove "pc_bddc_coarse_" */ 1078 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1079 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1080 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1081 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1082 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1083 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1084 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1085 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1086 } 1087 1088 /* DIRICHLET PROBLEM */ 1089 /* Matrix for Dirichlet problem is pcis->A_II */ 1090 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1091 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1092 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1093 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1094 /* default */ 1095 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1096 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1097 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1098 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1099 if (issbaij) { 1100 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1101 } else { 1102 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1103 } 1104 /* Allow user's customization */ 1105 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1106 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1107 } 1108 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1109 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1110 if (!n_D) { 1111 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1112 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1113 } 1114 /* Set Up KSP for Dirichlet problem of BDDC */ 1115 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1116 /* set ksp_D into pcis data */ 1117 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1118 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1119 pcis->ksp_D = pcbddc->ksp_D; 1120 1121 /* NEUMANN PROBLEM */ 1122 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1123 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1124 if (pcbddc->ksp_R) { /* already created ksp */ 1125 PetscInt nn_R; 1126 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1127 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1128 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1129 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1130 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1131 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1132 reuse = MAT_INITIAL_MATRIX; 1133 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1134 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1135 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1136 reuse = MAT_INITIAL_MATRIX; 1137 } else { /* safe to reuse the matrix */ 1138 reuse = MAT_REUSE_MATRIX; 1139 } 1140 } 1141 /* last check */ 1142 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1143 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1144 reuse = MAT_INITIAL_MATRIX; 1145 } 1146 } else { /* first time, so we need to create the matrix */ 1147 reuse = MAT_INITIAL_MATRIX; 1148 } 1149 /* extract A_RR */ 1150 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1151 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1152 if (ibs != mbs) { 1153 Mat newmat; 1154 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1155 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1156 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1157 } else { 1158 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1159 } 1160 if (!pcbddc->ksp_R) { /* create object if not present */ 1161 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1162 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1163 /* default */ 1164 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1165 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1166 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1167 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1168 if (issbaij) { 1169 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1170 } else { 1171 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1172 } 1173 /* Allow user's customization */ 1174 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1175 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1176 } 1177 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1178 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1179 if (!n_R) { 1180 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1181 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1182 } 1183 /* Set Up KSP for Neumann problem of BDDC */ 1184 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1185 1186 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1187 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1188 /* Dirichlet */ 1189 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1190 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1191 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1192 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1193 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1194 /* need to be adapted? */ 1195 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1196 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1197 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1198 /* print info */ 1199 if (pcbddc->dbg_flag) { 1200 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1201 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1202 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1203 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1204 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1205 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1206 } 1207 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1208 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1209 } 1210 1211 /* Neumann */ 1212 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1213 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1214 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1215 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1216 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1217 /* need to be adapted? */ 1218 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1219 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1220 /* print info */ 1221 if (pcbddc->dbg_flag) { 1222 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1223 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1224 } 1225 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1226 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1227 } 1228 } 1229 /* free Neumann problem's matrix */ 1230 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1236 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1237 { 1238 PetscErrorCode ierr; 1239 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1240 1241 PetscFunctionBegin; 1242 if (applytranspose) { 1243 if (pcbddc->local_auxmat1) { 1244 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1245 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1246 } 1247 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1248 } else { 1249 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1250 if (pcbddc->local_auxmat1) { 1251 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1252 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1253 } 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1261 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1262 { 1263 PetscErrorCode ierr; 1264 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1265 PC_IS* pcis = (PC_IS*) (pc->data); 1266 const PetscScalar zero = 0.0; 1267 1268 PetscFunctionBegin; 1269 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1270 if (applytranspose) { 1271 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1272 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1273 } else { 1274 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1275 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1276 } 1277 /* start communications from local primal nodes to rhs of coarse solver */ 1278 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1279 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1280 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1281 1282 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1283 /* TODO remove null space when doing multilevel */ 1284 if (pcbddc->coarse_ksp) { 1285 if (applytranspose) { 1286 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1287 } else { 1288 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1289 } 1290 } 1291 1292 /* Local solution on R nodes */ 1293 if (pcis->n) { 1294 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1295 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1296 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1297 if (pcbddc->switch_static) { 1298 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1299 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1300 } 1301 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1302 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1303 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1304 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1305 if (pcbddc->switch_static) { 1306 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1307 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1308 } 1309 } 1310 1311 /* communications from coarse sol to local primal nodes */ 1312 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1313 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1314 1315 /* Sum contributions from two levels */ 1316 if (applytranspose) { 1317 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1318 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1319 } else { 1320 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1321 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1322 } 1323 PetscFunctionReturn(0); 1324 } 1325 1326 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1329 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1330 { 1331 PetscErrorCode ierr; 1332 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1333 PetscScalar *array,*array2; 1334 Vec from,to; 1335 1336 PetscFunctionBegin; 1337 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1338 from = pcbddc->coarse_vec; 1339 to = pcbddc->vec1_P; 1340 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1341 Vec tvec; 1342 PetscInt lsize; 1343 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1344 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1345 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1346 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1347 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1348 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1349 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1350 } 1351 } else { /* from local to global -> put data in coarse right hand side */ 1352 from = pcbddc->vec1_P; 1353 to = pcbddc->coarse_vec; 1354 } 1355 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1361 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1362 { 1363 PetscErrorCode ierr; 1364 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1365 PetscScalar *array,*array2; 1366 Vec from,to; 1367 1368 PetscFunctionBegin; 1369 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1370 from = pcbddc->coarse_vec; 1371 to = pcbddc->vec1_P; 1372 } else { /* from local to global -> put data in coarse right hand side */ 1373 from = pcbddc->vec1_P; 1374 to = pcbddc->coarse_vec; 1375 } 1376 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1377 if (smode == SCATTER_FORWARD) { 1378 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1379 Vec tvec; 1380 PetscInt lsize; 1381 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1382 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1383 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1384 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1385 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1386 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1387 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1388 } 1389 } 1390 PetscFunctionReturn(0); 1391 } 1392 1393 /* uncomment for testing purposes */ 1394 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1395 #undef __FUNCT__ 1396 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1397 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1398 { 1399 PetscErrorCode ierr; 1400 PC_IS* pcis = (PC_IS*)(pc->data); 1401 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1402 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1403 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1404 MatType impMatType=MATSEQAIJ; 1405 /* one and zero */ 1406 PetscScalar one=1.0,zero=0.0; 1407 /* space to store constraints and their local indices */ 1408 PetscScalar *temp_quadrature_constraint; 1409 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1410 /* iterators */ 1411 PetscInt i,j,k,total_counts,temp_start_ptr; 1412 /* stuff to store connected components stored in pcbddc->mat_graph */ 1413 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1414 PetscInt n_ISForFaces,n_ISForEdges; 1415 /* near null space stuff */ 1416 MatNullSpace nearnullsp; 1417 const Vec *nearnullvecs; 1418 Vec *localnearnullsp; 1419 PetscBool nnsp_has_cnst; 1420 PetscInt nnsp_size; 1421 PetscScalar *array; 1422 /* BLAS integers */ 1423 PetscBLASInt lwork,lierr; 1424 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1425 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1426 /* LAPACK working arrays for SVD or POD */ 1427 PetscBool skip_lapack; 1428 PetscScalar *work; 1429 PetscReal *singular_vals; 1430 #if defined(PETSC_USE_COMPLEX) 1431 PetscReal *rwork; 1432 #endif 1433 #if defined(PETSC_MISSING_LAPACK_GESVD) 1434 PetscBLASInt Blas_one_2=1; 1435 PetscScalar *temp_basis,*correlation_mat; 1436 #else 1437 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1438 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1439 #endif 1440 /* reuse */ 1441 PetscInt olocal_primal_size; 1442 PetscInt *oprimal_indices_local_idxs; 1443 /* change of basis */ 1444 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1445 PetscBool boolforchange,qr_needed; 1446 PetscBT touched,change_basis,qr_needed_idx; 1447 /* auxiliary stuff */ 1448 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1449 PetscInt ncc,*gidxs,*permutation,*temp_indices_to_constraint_work; 1450 PetscScalar *temp_quadrature_constraint_work; 1451 /* some quantities */ 1452 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1453 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1454 1455 1456 PetscFunctionBegin; 1457 /* Destroy Mat objects computed previously */ 1458 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1459 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1460 /* Get index sets for faces, edges and vertices from graph */ 1461 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1462 /* free unneeded index sets */ 1463 if (!pcbddc->use_vertices) { 1464 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1465 } 1466 if (!pcbddc->use_edges) { 1467 for (i=0;i<n_ISForEdges;i++) { 1468 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1469 } 1470 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1471 n_ISForEdges = 0; 1472 } 1473 if (!pcbddc->use_faces) { 1474 for (i=0;i<n_ISForFaces;i++) { 1475 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1476 } 1477 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1478 n_ISForFaces = 0; 1479 } 1480 /* HACKS (the following two blocks of code) */ 1481 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1482 pcbddc->use_change_of_basis = PETSC_TRUE; 1483 if (!ISForEdges) { 1484 pcbddc->use_change_on_faces = PETSC_TRUE; 1485 } 1486 } 1487 if (pcbddc->NullSpace) { 1488 /* use_change_of_basis should be consistent among processors */ 1489 PetscBool tbool[2],gbool[2]; 1490 tbool [0] = pcbddc->use_change_of_basis; 1491 tbool [1] = pcbddc->use_change_on_faces; 1492 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1493 pcbddc->use_change_of_basis = gbool[0]; 1494 pcbddc->use_change_on_faces = gbool[1]; 1495 } 1496 /* print some info */ 1497 if (pcbddc->dbg_flag) { 1498 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1499 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1500 i = 0; 1501 if (ISForVertices) { 1502 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1503 } 1504 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1505 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1506 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1507 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1508 } 1509 /* check if near null space is attached to global mat */ 1510 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1511 if (nearnullsp) { 1512 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1513 /* remove any stored info */ 1514 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1515 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1516 /* store information for BDDC solver reuse */ 1517 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1518 pcbddc->onearnullspace = nearnullsp; 1519 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1520 for (i=0;i<nnsp_size;i++) { 1521 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1522 } 1523 } else { /* if near null space is not provided BDDC uses constants by default */ 1524 nnsp_size = 0; 1525 nnsp_has_cnst = PETSC_TRUE; 1526 } 1527 /* get max number of constraints on a single cc */ 1528 max_constraints = nnsp_size; 1529 if (nnsp_has_cnst) max_constraints++; 1530 1531 /* 1532 Evaluate maximum storage size needed by the procedure 1533 - temp_indices will contain start index of each constraint stored as follows 1534 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1535 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1536 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1537 */ 1538 total_counts = n_ISForFaces+n_ISForEdges; 1539 total_counts *= max_constraints; 1540 n_vertices = 0; 1541 if (ISForVertices) { 1542 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1543 } 1544 total_counts += n_vertices; 1545 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1546 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1547 total_counts = 0; 1548 max_size_of_constraint = 0; 1549 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1550 if (i<n_ISForEdges) { 1551 used_IS = &ISForEdges[i]; 1552 } else { 1553 used_IS = &ISForFaces[i-n_ISForEdges]; 1554 } 1555 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1556 total_counts += j; 1557 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1558 } 1559 total_counts *= max_constraints; 1560 total_counts += n_vertices; 1561 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1562 /* get local part of global near null space vectors */ 1563 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1564 for (k=0;k<nnsp_size;k++) { 1565 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1566 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1567 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 } 1569 1570 /* whether or not to skip lapack calls */ 1571 skip_lapack = PETSC_TRUE; 1572 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1573 1574 /* allocate some auxiliary stuff */ 1575 if (!skip_lapack || pcbddc->use_qr_single) { 1576 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1577 } 1578 1579 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1580 if (!skip_lapack) { 1581 PetscScalar temp_work; 1582 1583 #if defined(PETSC_MISSING_LAPACK_GESVD) 1584 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1585 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1586 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1587 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1588 #if defined(PETSC_USE_COMPLEX) 1589 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1590 #endif 1591 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1592 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1593 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1594 lwork = -1; 1595 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1596 #if !defined(PETSC_USE_COMPLEX) 1597 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1598 #else 1599 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1600 #endif 1601 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1602 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1603 #else /* on missing GESVD */ 1604 /* SVD */ 1605 PetscInt max_n,min_n; 1606 max_n = max_size_of_constraint; 1607 min_n = max_constraints; 1608 if (max_size_of_constraint < max_constraints) { 1609 min_n = max_size_of_constraint; 1610 max_n = max_constraints; 1611 } 1612 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1613 #if defined(PETSC_USE_COMPLEX) 1614 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1615 #endif 1616 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1617 lwork = -1; 1618 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1619 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1620 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1621 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1622 #if !defined(PETSC_USE_COMPLEX) 1623 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1624 #else 1625 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1626 #endif 1627 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1628 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1629 #endif /* on missing GESVD */ 1630 /* Allocate optimal workspace */ 1631 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1632 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1633 } 1634 /* Now we can loop on constraining sets */ 1635 total_counts = 0; 1636 temp_indices[0] = 0; 1637 /* vertices */ 1638 if (ISForVertices) { 1639 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1640 if (nnsp_has_cnst) { /* consider all vertices */ 1641 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1642 for (i=0;i<n_vertices;i++) { 1643 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1644 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1645 total_counts++; 1646 } 1647 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1648 PetscBool used_vertex; 1649 for (i=0;i<n_vertices;i++) { 1650 used_vertex = PETSC_FALSE; 1651 k = 0; 1652 while (!used_vertex && k<nnsp_size) { 1653 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1654 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1655 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1656 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1657 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1658 total_counts++; 1659 used_vertex = PETSC_TRUE; 1660 } 1661 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1662 k++; 1663 } 1664 } 1665 } 1666 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1667 n_vertices = total_counts; 1668 } 1669 1670 /* edges and faces */ 1671 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1672 if (ncc<n_ISForEdges) { 1673 used_IS = &ISForEdges[ncc]; 1674 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1675 } else { 1676 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1677 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1678 } 1679 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1680 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1681 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1682 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1683 /* change of basis should not be performed on local periodic nodes */ 1684 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1685 if (nnsp_has_cnst) { 1686 PetscScalar quad_value; 1687 temp_constraints++; 1688 if (!pcbddc->use_nnsp_true) { 1689 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1690 } else { 1691 quad_value = 1.0; 1692 } 1693 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1694 for (j=0;j<size_of_constraint;j++) { 1695 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1696 } 1697 /* sort by global ordering if using lapack subroutines */ 1698 if (!skip_lapack || pcbddc->use_qr_single) { 1699 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1700 for (j=0;j<size_of_constraint;j++) { 1701 permutation[j]=j; 1702 } 1703 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1704 for (j=0;j<size_of_constraint;j++) { 1705 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1706 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1707 } 1708 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1709 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1710 } 1711 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1712 total_counts++; 1713 } 1714 for (k=0;k<nnsp_size;k++) { 1715 PetscReal real_value; 1716 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1717 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1718 for (j=0;j<size_of_constraint;j++) { 1719 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1720 } 1721 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1722 /* check if array is null on the connected component */ 1723 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1724 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1725 if (real_value > 0.0) { /* keep indices and values */ 1726 /* sort by global ordering if using lapack subroutines */ 1727 if (!skip_lapack || pcbddc->use_qr_single) { 1728 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1729 for (j=0;j<size_of_constraint;j++) { 1730 permutation[j]=j; 1731 } 1732 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1733 for (j=0;j<size_of_constraint;j++) { 1734 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1735 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1736 } 1737 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1738 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1739 } 1740 temp_constraints++; 1741 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1742 total_counts++; 1743 } 1744 } 1745 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1746 valid_constraints = temp_constraints; 1747 if (!pcbddc->use_nnsp_true && temp_constraints) { 1748 if (temp_constraints == 1) { /* just normalize the constraint */ 1749 PetscScalar norm; 1750 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1751 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1752 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1753 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1754 } else { /* perform SVD */ 1755 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1756 1757 #if defined(PETSC_MISSING_LAPACK_GESVD) 1758 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1759 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1760 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1761 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1762 from that computed using LAPACKgesvd 1763 -> This is due to a different computation of eigenvectors in LAPACKheev 1764 -> The quality of the POD-computed basis will be the same */ 1765 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1766 /* Store upper triangular part of correlation matrix */ 1767 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1768 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1769 for (j=0;j<temp_constraints;j++) { 1770 for (k=0;k<j+1;k++) { 1771 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1772 } 1773 } 1774 /* compute eigenvalues and eigenvectors of correlation matrix */ 1775 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1776 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1777 #if !defined(PETSC_USE_COMPLEX) 1778 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1779 #else 1780 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1781 #endif 1782 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1783 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1784 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1785 j = 0; 1786 while (j < temp_constraints && singular_vals[j] < tol) j++; 1787 total_counts = total_counts-j; 1788 valid_constraints = temp_constraints-j; 1789 /* scale and copy POD basis into used quadrature memory */ 1790 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1791 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1792 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1793 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1794 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1795 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1796 if (j<temp_constraints) { 1797 PetscInt ii; 1798 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1799 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1800 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1801 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1802 for (k=0;k<temp_constraints-j;k++) { 1803 for (ii=0;ii<size_of_constraint;ii++) { 1804 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1805 } 1806 } 1807 } 1808 #else /* on missing GESVD */ 1809 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1810 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1811 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1812 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1813 #if !defined(PETSC_USE_COMPLEX) 1814 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1815 #else 1816 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1817 #endif 1818 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1819 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1820 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1821 k = temp_constraints; 1822 if (k > size_of_constraint) k = size_of_constraint; 1823 j = 0; 1824 while (j < k && singular_vals[k-j-1] < tol) j++; 1825 valid_constraints = k-j; 1826 total_counts = total_counts-temp_constraints+valid_constraints; 1827 #endif /* on missing GESVD */ 1828 } 1829 } 1830 /* setting change_of_basis flag is safe now */ 1831 if (boolforchange) { 1832 for (j=0;j<valid_constraints;j++) { 1833 PetscBTSet(change_basis,total_counts-j-1); 1834 } 1835 } 1836 } 1837 /* free index sets of faces, edges and vertices */ 1838 for (i=0;i<n_ISForFaces;i++) { 1839 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1840 } 1841 if (n_ISForFaces) { 1842 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1843 } 1844 for (i=0;i<n_ISForEdges;i++) { 1845 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1846 } 1847 if (n_ISForEdges) { 1848 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1849 } 1850 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1851 /* map temp_indices_to_constraint in boundary numbering */ 1852 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1853 if (i != temp_indices[total_counts]) { 1854 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1855 } 1856 1857 /* free workspace */ 1858 if (!skip_lapack || pcbddc->use_qr_single) { 1859 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 1860 } 1861 if (!skip_lapack) { 1862 ierr = PetscFree(work);CHKERRQ(ierr); 1863 #if defined(PETSC_USE_COMPLEX) 1864 ierr = PetscFree(rwork);CHKERRQ(ierr); 1865 #endif 1866 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1867 #if defined(PETSC_MISSING_LAPACK_GESVD) 1868 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1869 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1870 #endif 1871 } 1872 for (k=0;k<nnsp_size;k++) { 1873 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1874 } 1875 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1876 1877 /* set quantities in pcbddc data structure and store previous primal size */ 1878 /* n_vertices defines the number of subdomain corners in the primal space */ 1879 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1880 olocal_primal_size = pcbddc->local_primal_size; 1881 pcbddc->local_primal_size = total_counts; 1882 pcbddc->n_vertices = n_vertices; 1883 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1884 1885 /* Create constraint matrix */ 1886 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1887 /* so we need to set it up properly either with or without change of basis */ 1888 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1889 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1890 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1891 /* array to compute a local numbering of constraints : vertices first then constraints */ 1892 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1893 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1894 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1895 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1896 /* auxiliary stuff for basis change */ 1897 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1898 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1899 1900 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1901 total_primal_vertices=0; 1902 for (i=0;i<pcbddc->local_primal_size;i++) { 1903 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1904 if (size_of_constraint == 1) { 1905 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1906 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1907 aux_primal_minloc[total_primal_vertices]=0; 1908 total_primal_vertices++; 1909 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1910 PetscInt min_loc,min_index; 1911 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1912 /* find first untouched local node */ 1913 k = 0; 1914 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1915 min_index = global_indices[k]; 1916 min_loc = k; 1917 /* search the minimum among global nodes already untouched on the cc */ 1918 for (k=1;k<size_of_constraint;k++) { 1919 /* there can be more than one constraint on a single connected component */ 1920 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1921 min_index = global_indices[k]; 1922 min_loc = k; 1923 } 1924 } 1925 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1926 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1927 aux_primal_minloc[total_primal_vertices]=min_loc; 1928 total_primal_vertices++; 1929 } 1930 } 1931 /* determine if a QR strategy is needed for change of basis */ 1932 qr_needed = PETSC_FALSE; 1933 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1934 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1935 if (PetscBTLookup(change_basis,i)) { 1936 if (!pcbddc->use_qr_single) { 1937 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1938 j = 0; 1939 for (k=0;k<size_of_constraint;k++) { 1940 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1941 j++; 1942 } 1943 } 1944 /* found more than one primal dof on the cc */ 1945 if (j > 1) { 1946 PetscBTSet(qr_needed_idx,i); 1947 qr_needed = PETSC_TRUE; 1948 } 1949 } else { 1950 PetscBTSet(qr_needed_idx,i); 1951 qr_needed = PETSC_TRUE; 1952 } 1953 } 1954 } 1955 /* free workspace */ 1956 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1957 1958 /* permute indices in order to have a sorted set of vertices */ 1959 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1960 1961 /* nonzero structure of constraint matrix */ 1962 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1963 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1964 j=total_primal_vertices; 1965 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1966 if (!PetscBTLookup(change_basis,i)) { 1967 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1968 j++; 1969 } 1970 } 1971 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1972 ierr = PetscFree(nnz);CHKERRQ(ierr); 1973 /* set values in constraint matrix */ 1974 for (i=0;i<total_primal_vertices;i++) { 1975 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1976 } 1977 total_counts = total_primal_vertices; 1978 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1979 if (!PetscBTLookup(change_basis,i)) { 1980 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1981 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1982 total_counts++; 1983 } 1984 } 1985 /* assembling */ 1986 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1987 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1988 /* 1989 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1990 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1991 */ 1992 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1993 if (pcbddc->use_change_of_basis) { 1994 /* dual and primal dofs on a single cc */ 1995 PetscInt dual_dofs,primal_dofs; 1996 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1997 PetscInt primal_counter; 1998 /* working stuff for GEQRF */ 1999 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2000 PetscBLASInt lqr_work; 2001 /* working stuff for UNGQR */ 2002 PetscScalar *gqr_work,lgqr_work_t; 2003 PetscBLASInt lgqr_work; 2004 /* working stuff for TRTRS */ 2005 PetscScalar *trs_rhs; 2006 PetscBLASInt Blas_NRHS; 2007 /* pointers for values insertion into change of basis matrix */ 2008 PetscInt *start_rows,*start_cols; 2009 PetscScalar *start_vals; 2010 /* working stuff for values insertion */ 2011 PetscBT is_primal; 2012 /* matrix sizes */ 2013 PetscInt global_size,local_size; 2014 /* work array for nonzeros */ 2015 PetscScalar *nnz_array; 2016 /* temporary change of basis */ 2017 Mat localChangeOfBasisMatrix; 2018 /* auxiliary work for global change of basis */ 2019 Vec nnz_vec; 2020 PetscInt *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz; 2021 PetscInt nvtxs,*xadj,*adjncy,*idxs_mapped; 2022 PetscScalar *vals; 2023 PetscBool done; 2024 2025 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2026 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2027 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2028 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2029 2030 /* nonzeros for local mat */ 2031 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 2032 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 2033 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2034 if (PetscBTLookup(change_basis,i)) { 2035 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2036 if (PetscBTLookup(qr_needed_idx,i)) { 2037 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2038 } else { 2039 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 2040 /* get local primal index on the cc */ 2041 j = 0; 2042 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2043 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2044 } 2045 } 2046 } 2047 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2048 /* Set initial identity in the matrix */ 2049 for (i=0;i<pcis->n_B;i++) { 2050 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2051 } 2052 2053 if (pcbddc->dbg_flag) { 2054 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2055 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2056 } 2057 2058 2059 /* Now we loop on the constraints which need a change of basis */ 2060 /* 2061 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2062 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2063 2064 Basic blocks of change of basis matrix T computed by 2065 2066 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2067 2068 | 1 0 ... 0 s_1/S | 2069 | 0 1 ... 0 s_2/S | 2070 | ... | 2071 | 0 ... 1 s_{n-1}/S | 2072 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2073 2074 with S = \sum_{i=1}^n s_i^2 2075 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2076 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2077 2078 - QR decomposition of constraints otherwise 2079 */ 2080 if (qr_needed) { 2081 /* space to store Q */ 2082 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2083 /* first we issue queries for optimal work */ 2084 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2085 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2086 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2087 lqr_work = -1; 2088 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2089 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2090 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2091 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2092 lgqr_work = -1; 2093 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2094 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2095 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2096 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2097 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2098 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2099 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2100 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2101 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2102 /* array to store scaling factors for reflectors */ 2103 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2104 /* array to store rhs and solution of triangular solver */ 2105 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2106 /* allocating workspace for check */ 2107 if (pcbddc->dbg_flag) { 2108 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2109 } 2110 } 2111 /* array to store whether a node is primal or not */ 2112 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2113 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2114 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2115 if (i != total_primal_vertices) { 2116 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2117 } 2118 for (i=0;i<total_primal_vertices;i++) { 2119 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2120 } 2121 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2122 2123 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2124 /* -> using implicit ordering contained in temp_indices data */ 2125 total_counts = pcbddc->n_vertices; 2126 primal_counter = total_counts; 2127 while (total_counts<pcbddc->local_primal_size) { 2128 primal_dofs = 1; 2129 if (PetscBTLookup(change_basis,total_counts)) { 2130 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2131 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2132 primal_dofs++; 2133 } 2134 /* get constraint info */ 2135 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2136 dual_dofs = size_of_constraint-primal_dofs; 2137 2138 if (pcbddc->dbg_flag) { 2139 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2140 } 2141 2142 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2143 2144 /* copy quadrature constraints for change of basis check */ 2145 if (pcbddc->dbg_flag) { 2146 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2147 } 2148 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2149 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2150 2151 /* compute QR decomposition of constraints */ 2152 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2153 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2154 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2155 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2156 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2157 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2158 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2159 2160 /* explictly compute R^-T */ 2161 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2162 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2163 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2164 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2165 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2166 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2167 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2168 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2169 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2170 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2171 2172 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2173 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2174 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2175 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2176 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2177 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2178 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2179 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2180 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2181 2182 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2183 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2184 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2185 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2186 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2187 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2188 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2189 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2190 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2191 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2192 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2193 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2194 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2195 2196 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2197 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2198 /* insert cols for primal dofs */ 2199 for (j=0;j<primal_dofs;j++) { 2200 start_vals = &qr_basis[j*size_of_constraint]; 2201 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2202 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2203 } 2204 /* insert cols for dual dofs */ 2205 for (j=0,k=0;j<dual_dofs;k++) { 2206 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2207 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2208 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2209 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2210 j++; 2211 } 2212 } 2213 2214 /* check change of basis */ 2215 if (pcbddc->dbg_flag) { 2216 PetscInt ii,jj; 2217 PetscBool valid_qr=PETSC_TRUE; 2218 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2219 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2220 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2221 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2222 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2223 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2224 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2225 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2226 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2227 for (jj=0;jj<size_of_constraint;jj++) { 2228 for (ii=0;ii<primal_dofs;ii++) { 2229 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2230 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2231 } 2232 } 2233 if (!valid_qr) { 2234 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2235 for (jj=0;jj<size_of_constraint;jj++) { 2236 for (ii=0;ii<primal_dofs;ii++) { 2237 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2238 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2239 } 2240 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2241 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2242 } 2243 } 2244 } 2245 } else { 2246 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2247 } 2248 } 2249 } else { /* simple transformation block */ 2250 PetscInt row,col; 2251 PetscScalar val,norm; 2252 2253 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2254 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2255 for (j=0;j<size_of_constraint;j++) { 2256 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2257 if (!PetscBTLookup(is_primal,row)) { 2258 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2259 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2260 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2261 } else { 2262 for (k=0;k<size_of_constraint;k++) { 2263 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2264 if (row != col) { 2265 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2266 } else { 2267 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2268 } 2269 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2270 } 2271 } 2272 } 2273 if (pcbddc->dbg_flag) { 2274 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2275 } 2276 } 2277 /* increment primal counter */ 2278 primal_counter += primal_dofs; 2279 } else { 2280 if (pcbddc->dbg_flag) { 2281 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2282 } 2283 } 2284 /* increment constraint counter total_counts */ 2285 total_counts += primal_dofs; 2286 } 2287 2288 /* free workspace */ 2289 if (qr_needed) { 2290 if (pcbddc->dbg_flag) { 2291 ierr = PetscFree(work);CHKERRQ(ierr); 2292 } 2293 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2294 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2295 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2296 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2297 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2298 } 2299 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2300 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2301 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2302 2303 /* assembling of global change of variable */ 2304 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2305 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2306 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2307 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2308 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2309 ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr); 2310 2311 /* nonzeros (overestimated) */ 2312 ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr); 2313 ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr); 2314 ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr); 2315 for (i=0;i<pcis->n;i++) { 2316 nnz_array[i] = 1.0; 2317 idxs_all[i] = i; 2318 } 2319 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2320 for (i=0;i<pcis->n_B;i++) { 2321 nnz_array[idxs_B[i]] = nnz[i]; 2322 } 2323 if (pcis->n) { 2324 ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr); 2325 } 2326 ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr); 2327 ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr); 2328 ierr = PetscFree(nnz);CHKERRQ(ierr); 2329 ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr); 2330 ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr); 2331 ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2332 for (i=0;i<local_size;i++) { 2333 d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size); 2334 o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size); 2335 } 2336 ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2337 ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr); 2338 ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2339 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2340 2341 /* Set identity on dirichlet dofs */ 2342 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2343 for (i=0;i<pcis->n-pcis->n_B;i++) { 2344 PetscScalar one=1.0; 2345 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr); 2346 } 2347 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2348 2349 /* Set values at interface dofs */ 2350 done = PETSC_TRUE; 2351 ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2352 if (!done) { 2353 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2354 } 2355 ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2356 ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr); 2357 ierr = ISLocalToGlobalMappingApply(pcbddc->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr); 2358 for (i=0;i<nvtxs;i++) { 2359 PetscInt row,*cols,ncols; 2360 PetscScalar *mat_vals; 2361 2362 row = idxs_B[i]; 2363 ncols = xadj[i+1]-xadj[i]; 2364 cols = idxs_mapped+xadj[i]; 2365 mat_vals = vals+xadj[i]; 2366 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr); 2367 } 2368 ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2369 if (!done) { 2370 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2371 } 2372 ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2373 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2374 ierr = PetscFree(idxs_mapped);CHKERRQ(ierr); 2375 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2376 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2377 2378 /* check */ 2379 if (pcbddc->dbg_flag) { 2380 PetscReal error; 2381 Vec x,x_change; 2382 2383 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2384 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2385 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2386 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2387 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2388 ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2389 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 2390 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2391 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2392 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2393 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2394 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2395 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2396 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr); 2397 ierr = VecDestroy(&x);CHKERRQ(ierr); 2398 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2399 } 2400 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2401 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2402 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2403 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2404 } 2405 2406 /* set up change of basis context */ 2407 if (pcbddc->ChangeOfBasisMatrix) { 2408 PCBDDCChange_ctx change_ctx; 2409 2410 if (!pcbddc->new_global_mat) { 2411 PetscInt global_size,local_size; 2412 2413 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2414 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2415 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2416 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2417 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2418 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2419 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2420 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2421 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2422 } else { 2423 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2424 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2425 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2426 } 2427 if (!pcbddc->user_ChangeOfBasisMatrix) { 2428 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2429 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2430 } else { 2431 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2432 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2433 } 2434 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2435 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2436 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2437 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2438 } 2439 2440 /* get indices in local ordering for vertices and constraints */ 2441 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2442 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2443 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2444 } 2445 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2446 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2447 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2448 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2449 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2450 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2451 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2452 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2453 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2454 /* set quantities in PCBDDC data struct */ 2455 pcbddc->n_actual_vertices = i; 2456 /* check if a new primal space has been introduced */ 2457 pcbddc->new_primal_space_local = PETSC_TRUE; 2458 if (olocal_primal_size == pcbddc->local_primal_size) { 2459 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2460 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2461 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2462 } 2463 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2464 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2465 2466 /* flush dbg viewer */ 2467 if (pcbddc->dbg_flag) { 2468 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2469 } 2470 2471 /* free workspace */ 2472 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2473 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2474 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2475 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2476 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2477 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2483 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2484 { 2485 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2486 PC_IS *pcis = (PC_IS*)pc->data; 2487 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2488 PetscInt ierr,i,vertex_size; 2489 PetscViewer viewer=pcbddc->dbg_viewer; 2490 2491 PetscFunctionBegin; 2492 /* Reset previously computed graph */ 2493 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2494 /* Init local Graph struct */ 2495 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2496 2497 /* Check validity of the csr graph passed in by the user */ 2498 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2499 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2500 } 2501 2502 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2503 if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) { 2504 Mat mat_adj; 2505 const PetscInt *xadj,*adjncy; 2506 PetscBool flg_row=PETSC_TRUE; 2507 2508 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2509 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2510 if (!flg_row) { 2511 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2512 } 2513 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2514 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2515 if (!flg_row) { 2516 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2517 } 2518 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2519 pcbddc->deluxe_compute_rowadj = PETSC_FALSE; 2520 } 2521 2522 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2523 vertex_size = 1; 2524 if (pcbddc->user_provided_isfordofs) { 2525 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2526 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2527 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2528 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2529 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2530 } 2531 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2532 pcbddc->n_ISForDofs = 0; 2533 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2534 } 2535 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2536 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2537 } else { 2538 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2539 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2540 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2541 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2542 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2543 } 2544 } 2545 } 2546 2547 /* Setup of Graph */ 2548 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2549 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2550 } 2551 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2552 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2553 } 2554 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2555 2556 /* Graph's connected components analysis */ 2557 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2558 2559 /* print some info to stdout */ 2560 if (pcbddc->dbg_flag) { 2561 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2562 } 2563 2564 /* mark topography has done */ 2565 pcbddc->recompute_topography = PETSC_FALSE; 2566 PetscFunctionReturn(0); 2567 } 2568 2569 #undef __FUNCT__ 2570 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2571 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2572 { 2573 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2574 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2575 PetscErrorCode ierr; 2576 2577 PetscFunctionBegin; 2578 n = 0; 2579 vertices = 0; 2580 if (pcbddc->ConstraintMatrix) { 2581 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2582 for (i=0;i<local_primal_size;i++) { 2583 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2584 if (size_of_constraint == 1) n++; 2585 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2586 } 2587 if (vertices_idx) { 2588 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2589 n = 0; 2590 for (i=0;i<local_primal_size;i++) { 2591 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2592 if (size_of_constraint == 1) { 2593 vertices[n++]=row_cmat_indices[0]; 2594 } 2595 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2596 } 2597 } 2598 } 2599 *n_vertices = n; 2600 if (vertices_idx) *vertices_idx = vertices; 2601 PetscFunctionReturn(0); 2602 } 2603 2604 #undef __FUNCT__ 2605 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2606 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2607 { 2608 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2609 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2610 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2611 PetscBT touched; 2612 PetscErrorCode ierr; 2613 2614 /* This function assumes that the number of local constraints per connected component 2615 is not greater than the number of nodes defined for the connected component 2616 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2617 PetscFunctionBegin; 2618 n = 0; 2619 constraints_index = 0; 2620 if (pcbddc->ConstraintMatrix) { 2621 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2622 max_size_of_constraint = 0; 2623 for (i=0;i<local_primal_size;i++) { 2624 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2625 if (size_of_constraint > 1) { 2626 n++; 2627 } 2628 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2629 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2630 } 2631 if (constraints_idx) { 2632 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2633 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2634 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2635 n = 0; 2636 for (i=0;i<local_primal_size;i++) { 2637 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2638 if (size_of_constraint > 1) { 2639 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2640 /* find first untouched local node */ 2641 j = 0; 2642 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2643 min_index = row_cmat_global_indices[j]; 2644 min_loc = j; 2645 /* search the minimum among nodes not yet touched on the connected component 2646 since there can be more than one constraint on a single cc */ 2647 for (j=1;j<size_of_constraint;j++) { 2648 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2649 min_index = row_cmat_global_indices[j]; 2650 min_loc = j; 2651 } 2652 } 2653 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2654 constraints_index[n++] = row_cmat_indices[min_loc]; 2655 } 2656 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2657 } 2658 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2659 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2660 } 2661 } 2662 *n_constraints = n; 2663 if (constraints_idx) *constraints_idx = constraints_index; 2664 PetscFunctionReturn(0); 2665 } 2666 2667 #undef __FUNCT__ 2668 #define __FUNCT__ "PCBDDCSubsetNumbering" 2669 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2670 { 2671 Vec local_vec,global_vec; 2672 IS seqis,paris; 2673 VecScatter scatter_ctx; 2674 PetscScalar *array; 2675 PetscInt *temp_global_dofs; 2676 PetscScalar globalsum; 2677 PetscInt i,j,s; 2678 PetscInt nlocals,first_index,old_index,max_local; 2679 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2680 PetscMPIInt *dof_sizes,*dof_displs; 2681 PetscBool first_found; 2682 PetscErrorCode ierr; 2683 2684 PetscFunctionBegin; 2685 /* mpi buffers */ 2686 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2687 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2688 j = ( !rank_prec_comm ? size_prec_comm : 0); 2689 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2690 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2691 /* get maximum size of subset */ 2692 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2693 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2694 max_local = 0; 2695 for (i=0;i<n_local_dofs;i++) { 2696 if (max_local < temp_global_dofs[i] ) { 2697 max_local = temp_global_dofs[i]; 2698 } 2699 } 2700 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2701 max_global++; 2702 max_local = 0; 2703 for (i=0;i<n_local_dofs;i++) { 2704 if (max_local < local_dofs[i] ) { 2705 max_local = local_dofs[i]; 2706 } 2707 } 2708 max_local++; 2709 /* allocate workspace */ 2710 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2711 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2712 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2713 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2714 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2715 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2716 /* create scatter */ 2717 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2718 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2719 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2720 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2721 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2722 /* init array */ 2723 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2724 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2725 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2726 if (local_dofs_mult) { 2727 for (i=0;i<n_local_dofs;i++) { 2728 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2729 } 2730 } else { 2731 for (i=0;i<n_local_dofs;i++) { 2732 array[local_dofs[i]]=1.0; 2733 } 2734 } 2735 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2736 /* scatter into global vec and get total number of global dofs */ 2737 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2738 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2739 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2740 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2741 /* Fill global_vec with cumulative function for global numbering */ 2742 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2743 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2744 nlocals = 0; 2745 first_index = -1; 2746 first_found = PETSC_FALSE; 2747 for (i=0;i<s;i++) { 2748 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2749 first_found = PETSC_TRUE; 2750 first_index = i; 2751 } 2752 nlocals += (PetscInt)PetscRealPart(array[i]); 2753 } 2754 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2755 if (!rank_prec_comm) { 2756 dof_displs[0]=0; 2757 for (i=1;i<size_prec_comm;i++) { 2758 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2759 } 2760 } 2761 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2762 if (first_found) { 2763 array[first_index] += (PetscScalar)nlocals; 2764 old_index = first_index; 2765 for (i=first_index+1;i<s;i++) { 2766 if (PetscRealPart(array[i]) > 0.1) { 2767 array[i] += array[old_index]; 2768 old_index = i; 2769 } 2770 } 2771 } 2772 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2773 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2774 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2775 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2776 /* get global ordering of local dofs */ 2777 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2778 if (local_dofs_mult) { 2779 for (i=0;i<n_local_dofs;i++) { 2780 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2781 } 2782 } else { 2783 for (i=0;i<n_local_dofs;i++) { 2784 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2785 } 2786 } 2787 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2788 /* free workspace */ 2789 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2790 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2791 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2792 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2793 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2794 /* return pointer to global ordering of local dofs */ 2795 *global_numbering_subset = temp_global_dofs; 2796 PetscFunctionReturn(0); 2797 } 2798 2799 #undef __FUNCT__ 2800 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2801 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2802 { 2803 PetscInt i,j; 2804 PetscScalar *alphas; 2805 PetscErrorCode ierr; 2806 2807 PetscFunctionBegin; 2808 /* this implements stabilized Gram-Schmidt */ 2809 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2810 for (i=0;i<n;i++) { 2811 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2812 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2813 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2814 } 2815 ierr = PetscFree(alphas);CHKERRQ(ierr); 2816 PetscFunctionReturn(0); 2817 } 2818 2819 #undef __FUNCT__ 2820 #define __FUNCT__ "MatISGetSubassemblingPattern" 2821 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2822 { 2823 Mat subdomain_adj; 2824 IS new_ranks,ranks_send_to; 2825 MatPartitioning partitioner; 2826 Mat_IS *matis; 2827 PetscInt n_neighs,*neighs,*n_shared,**shared; 2828 PetscInt prank; 2829 PetscMPIInt size,rank,color; 2830 PetscInt *xadj,*adjncy,*oldranks; 2831 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2832 PetscInt i,local_size,threshold=0; 2833 PetscErrorCode ierr; 2834 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2835 PetscSubcomm subcomm; 2836 2837 PetscFunctionBegin; 2838 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2839 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2840 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2841 2842 /* Get info on mapping */ 2843 matis = (Mat_IS*)(mat->data); 2844 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2845 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2846 2847 /* build local CSR graph of subdomains' connectivity */ 2848 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2849 xadj[0] = 0; 2850 xadj[1] = PetscMax(n_neighs-1,0); 2851 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2852 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2853 2854 if (threshold) { 2855 PetscInt xadj_count = 0; 2856 for (i=1;i<n_neighs;i++) { 2857 if (n_shared[i] > threshold) { 2858 adjncy[xadj_count] = neighs[i]; 2859 adjncy_wgt[xadj_count] = n_shared[i]; 2860 xadj_count++; 2861 } 2862 } 2863 xadj[1] = xadj_count; 2864 } else { 2865 if (xadj[1]) { 2866 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2867 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2868 } 2869 } 2870 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2871 if (use_square) { 2872 for (i=0;i<xadj[1];i++) { 2873 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2874 } 2875 } 2876 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2877 2878 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 2879 2880 /* 2881 Restrict work on active processes only. 2882 */ 2883 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2884 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2885 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2886 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2887 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2888 if (color) { 2889 ierr = PetscFree(xadj);CHKERRQ(ierr); 2890 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2891 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2892 } else { 2893 PetscInt coarsening_ratio; 2894 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2895 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2896 prank = rank; 2897 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2898 /* 2899 for (i=0;i<size;i++) { 2900 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2901 } 2902 */ 2903 for (i=0;i<xadj[1];i++) { 2904 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2905 } 2906 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2907 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2908 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2909 2910 /* Partition */ 2911 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2912 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2913 if (use_vwgt) { 2914 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 2915 v_wgt[0] = local_size; 2916 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2917 } 2918 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2919 coarsening_ratio = size/n_subdomains; 2920 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2921 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2922 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2923 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2924 2925 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2926 if (contiguous) { 2927 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2928 } else { 2929 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2930 } 2931 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2932 /* clean up */ 2933 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2934 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2935 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2936 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2937 } 2938 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2939 2940 /* assemble parallel IS for sends */ 2941 i = 1; 2942 if (color) i=0; 2943 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2944 2945 /* get back IS */ 2946 *is_sends = ranks_send_to; 2947 PetscFunctionReturn(0); 2948 } 2949 2950 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2951 2952 #undef __FUNCT__ 2953 #define __FUNCT__ "MatISSubassemble" 2954 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 2955 { 2956 Mat local_mat; 2957 Mat_IS *matis; 2958 IS is_sends_internal; 2959 PetscInt rows,cols,new_local_rows; 2960 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 2961 PetscBool ismatis,isdense,newisdense,destroy_mat; 2962 ISLocalToGlobalMapping l2gmap; 2963 PetscInt* l2gmap_indices; 2964 const PetscInt* is_indices; 2965 MatType new_local_type; 2966 /* buffers */ 2967 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2968 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 2969 PetscInt *recv_buffer_idxs_local; 2970 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2971 /* MPI */ 2972 MPI_Comm comm,comm_n; 2973 PetscSubcomm subcomm; 2974 PetscMPIInt n_sends,n_recvs,commsize; 2975 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 2976 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 2977 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 2978 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 2979 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 2980 PetscErrorCode ierr; 2981 2982 PetscFunctionBegin; 2983 /* TODO: add missing checks */ 2984 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 2985 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 2986 PetscValidLogicalCollectiveEnum(mat,reuse,5); 2987 PetscValidLogicalCollectiveInt(mat,nis,7); 2988 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2989 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 2990 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2991 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2992 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2993 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2994 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2995 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 2996 PetscInt mrows,mcols,mnrows,mncols; 2997 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2998 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2999 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3000 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3001 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3002 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3003 } 3004 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3005 PetscValidLogicalCollectiveInt(mat,bs,0); 3006 /* prepare IS for sending if not provided */ 3007 if (!is_sends) { 3008 PetscBool pcontig = PETSC_TRUE; 3009 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3010 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3011 } else { 3012 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3013 is_sends_internal = is_sends; 3014 } 3015 3016 /* get pointer of MATIS data */ 3017 matis = (Mat_IS*)mat->data; 3018 3019 /* get comm */ 3020 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3021 3022 /* compute number of sends */ 3023 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3024 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3025 3026 /* compute number of receives */ 3027 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3028 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3029 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3030 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3031 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3032 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3033 ierr = PetscFree(iflags);CHKERRQ(ierr); 3034 3035 /* restrict comm if requested */ 3036 subcomm = 0; 3037 destroy_mat = PETSC_FALSE; 3038 if (restrict_comm) { 3039 PetscMPIInt color,rank,subcommsize; 3040 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3041 color = 0; 3042 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3043 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3044 subcommsize = commsize - subcommsize; 3045 /* check if reuse has been requested */ 3046 if (reuse == MAT_REUSE_MATRIX) { 3047 if (*mat_n) { 3048 PetscMPIInt subcommsize2; 3049 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3050 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3051 comm_n = PetscObjectComm((PetscObject)*mat_n); 3052 } else { 3053 comm_n = PETSC_COMM_SELF; 3054 } 3055 } else { /* MAT_INITIAL_MATRIX */ 3056 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3057 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3058 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3059 comm_n = subcomm->comm; 3060 } 3061 /* flag to destroy *mat_n if not significative */ 3062 if (color) destroy_mat = PETSC_TRUE; 3063 } else { 3064 comm_n = comm; 3065 } 3066 3067 /* prepare send/receive buffers */ 3068 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3069 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3070 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3071 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3072 if (nis) { 3073 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3074 } 3075 3076 /* Get data from local matrices */ 3077 if (!isdense) { 3078 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3079 /* TODO: See below some guidelines on how to prepare the local buffers */ 3080 /* 3081 send_buffer_vals should contain the raw values of the local matrix 3082 send_buffer_idxs should contain: 3083 - MatType_PRIVATE type 3084 - PetscInt size_of_l2gmap 3085 - PetscInt global_row_indices[size_of_l2gmap] 3086 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3087 */ 3088 } else { 3089 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3090 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3091 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3092 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3093 send_buffer_idxs[1] = i; 3094 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3095 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3096 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3097 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3098 for (i=0;i<n_sends;i++) { 3099 ilengths_vals[is_indices[i]] = len*len; 3100 ilengths_idxs[is_indices[i]] = len+2; 3101 } 3102 } 3103 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3104 /* additional is (if any) */ 3105 if (nis) { 3106 PetscMPIInt psum; 3107 PetscInt j; 3108 for (j=0,psum=0;j<nis;j++) { 3109 PetscInt plen; 3110 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3111 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3112 psum += len+1; /* indices + lenght */ 3113 } 3114 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3115 for (j=0,psum=0;j<nis;j++) { 3116 PetscInt plen; 3117 const PetscInt *is_array_idxs; 3118 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3119 send_buffer_idxs_is[psum] = plen; 3120 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3121 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3122 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3123 psum += plen+1; /* indices + lenght */ 3124 } 3125 for (i=0;i<n_sends;i++) { 3126 ilengths_idxs_is[is_indices[i]] = psum; 3127 } 3128 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3129 } 3130 3131 buf_size_idxs = 0; 3132 buf_size_vals = 0; 3133 buf_size_idxs_is = 0; 3134 for (i=0;i<n_recvs;i++) { 3135 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3136 buf_size_vals += (PetscInt)olengths_vals[i]; 3137 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3138 } 3139 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3140 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3141 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3142 3143 /* get new tags for clean communications */ 3144 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3145 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3146 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3147 3148 /* allocate for requests */ 3149 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3150 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3151 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3152 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3153 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3154 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3155 3156 /* communications */ 3157 ptr_idxs = recv_buffer_idxs; 3158 ptr_vals = recv_buffer_vals; 3159 ptr_idxs_is = recv_buffer_idxs_is; 3160 for (i=0;i<n_recvs;i++) { 3161 source_dest = onodes[i]; 3162 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3163 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3164 ptr_idxs += olengths_idxs[i]; 3165 ptr_vals += olengths_vals[i]; 3166 if (nis) { 3167 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3168 ptr_idxs_is += olengths_idxs_is[i]; 3169 } 3170 } 3171 for (i=0;i<n_sends;i++) { 3172 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3173 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3174 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3175 if (nis) { 3176 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3177 } 3178 } 3179 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3180 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3181 3182 /* assemble new l2g map */ 3183 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3184 ptr_idxs = recv_buffer_idxs; 3185 new_local_rows = 0; 3186 for (i=0;i<n_recvs;i++) { 3187 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3188 ptr_idxs += olengths_idxs[i]; 3189 } 3190 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3191 ptr_idxs = recv_buffer_idxs; 3192 new_local_rows = 0; 3193 for (i=0;i<n_recvs;i++) { 3194 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3195 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3196 ptr_idxs += olengths_idxs[i]; 3197 } 3198 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3199 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3200 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3201 3202 /* infer new local matrix type from received local matrices type */ 3203 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3204 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3205 if (n_recvs) { 3206 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3207 ptr_idxs = recv_buffer_idxs; 3208 for (i=0;i<n_recvs;i++) { 3209 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3210 new_local_type_private = MATAIJ_PRIVATE; 3211 break; 3212 } 3213 ptr_idxs += olengths_idxs[i]; 3214 } 3215 switch (new_local_type_private) { 3216 case MATDENSE_PRIVATE: 3217 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3218 new_local_type = MATSEQAIJ; 3219 bs = 1; 3220 } else { /* if I receive only 1 dense matrix */ 3221 new_local_type = MATSEQDENSE; 3222 bs = 1; 3223 } 3224 break; 3225 case MATAIJ_PRIVATE: 3226 new_local_type = MATSEQAIJ; 3227 bs = 1; 3228 break; 3229 case MATBAIJ_PRIVATE: 3230 new_local_type = MATSEQBAIJ; 3231 break; 3232 case MATSBAIJ_PRIVATE: 3233 new_local_type = MATSEQSBAIJ; 3234 break; 3235 default: 3236 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3237 break; 3238 } 3239 } else { /* by default, new_local_type is seqdense */ 3240 new_local_type = MATSEQDENSE; 3241 bs = 1; 3242 } 3243 3244 /* create MATIS object if needed */ 3245 if (reuse == MAT_INITIAL_MATRIX) { 3246 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3247 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3248 } else { 3249 /* it also destroys the local matrices */ 3250 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3251 } 3252 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3253 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3254 3255 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3256 3257 /* Global to local map of received indices */ 3258 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3259 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3260 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3261 3262 /* restore attributes -> type of incoming data and its size */ 3263 buf_size_idxs = 0; 3264 for (i=0;i<n_recvs;i++) { 3265 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3266 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3267 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3268 } 3269 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3270 3271 /* set preallocation */ 3272 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3273 if (!newisdense) { 3274 PetscInt *new_local_nnz=0; 3275 3276 ptr_vals = recv_buffer_vals; 3277 ptr_idxs = recv_buffer_idxs_local; 3278 if (n_recvs) { 3279 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3280 } 3281 for (i=0;i<n_recvs;i++) { 3282 PetscInt j; 3283 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3284 for (j=0;j<*(ptr_idxs+1);j++) { 3285 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3286 } 3287 } else { 3288 /* TODO */ 3289 } 3290 ptr_idxs += olengths_idxs[i]; 3291 } 3292 if (new_local_nnz) { 3293 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3294 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3295 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3296 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3297 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3298 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3299 } else { 3300 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3301 } 3302 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3303 } else { 3304 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3305 } 3306 3307 /* set values */ 3308 ptr_vals = recv_buffer_vals; 3309 ptr_idxs = recv_buffer_idxs_local; 3310 for (i=0;i<n_recvs;i++) { 3311 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3312 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3313 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3314 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3315 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3316 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3317 } else { 3318 /* TODO */ 3319 } 3320 ptr_idxs += olengths_idxs[i]; 3321 ptr_vals += olengths_vals[i]; 3322 } 3323 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3324 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3325 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3326 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3327 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3328 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3329 3330 #if 0 3331 if (!restrict_comm) { /* check */ 3332 Vec lvec,rvec; 3333 PetscReal infty_error; 3334 3335 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3336 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3337 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3338 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3339 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3340 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3341 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3342 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3343 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3344 } 3345 #endif 3346 3347 /* assemble new additional is (if any) */ 3348 if (nis) { 3349 PetscInt **temp_idxs,*count_is,j,psum; 3350 3351 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3352 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3353 ptr_idxs = recv_buffer_idxs_is; 3354 psum = 0; 3355 for (i=0;i<n_recvs;i++) { 3356 for (j=0;j<nis;j++) { 3357 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3358 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3359 psum += plen; 3360 ptr_idxs += plen+1; /* shift pointer to received data */ 3361 } 3362 } 3363 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3364 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3365 for (i=1;i<nis;i++) { 3366 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3367 } 3368 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3369 ptr_idxs = recv_buffer_idxs_is; 3370 for (i=0;i<n_recvs;i++) { 3371 for (j=0;j<nis;j++) { 3372 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3373 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3374 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3375 ptr_idxs += plen+1; /* shift pointer to received data */ 3376 } 3377 } 3378 for (i=0;i<nis;i++) { 3379 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3380 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3381 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3382 } 3383 ierr = PetscFree(count_is);CHKERRQ(ierr); 3384 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3385 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3386 } 3387 /* free workspace */ 3388 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3389 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3390 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3391 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3392 if (isdense) { 3393 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3394 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3395 } else { 3396 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3397 } 3398 if (nis) { 3399 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3400 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3401 } 3402 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3403 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3404 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3405 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3406 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3407 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3408 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3409 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3410 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3411 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3412 ierr = PetscFree(onodes);CHKERRQ(ierr); 3413 if (nis) { 3414 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3415 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3416 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3417 } 3418 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3419 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3420 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3421 for (i=0;i<nis;i++) { 3422 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3423 } 3424 } 3425 PetscFunctionReturn(0); 3426 } 3427 3428 /* temporary hack into ksp private data structure */ 3429 #include <petsc-private/kspimpl.h> 3430 3431 #undef __FUNCT__ 3432 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3433 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3434 { 3435 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3436 PC_IS *pcis = (PC_IS*)pc->data; 3437 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3438 MatNullSpace CoarseNullSpace=NULL; 3439 ISLocalToGlobalMapping coarse_islg; 3440 IS coarse_is,*isarray; 3441 PetscInt i,im_active=-1,active_procs=-1; 3442 PetscInt nis,nisdofs,nisneu; 3443 PC pc_temp; 3444 PCType coarse_pc_type; 3445 KSPType coarse_ksp_type; 3446 PetscBool multilevel_requested,multilevel_allowed; 3447 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3448 Mat t_coarse_mat_is; 3449 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3450 PetscMPIInt all_procs; 3451 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3452 PetscBool compute_vecs = PETSC_FALSE; 3453 PetscScalar *array; 3454 PetscErrorCode ierr; 3455 3456 PetscFunctionBegin; 3457 /* Assign global numbering to coarse dofs */ 3458 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3459 compute_vecs = PETSC_TRUE; 3460 PetscInt ocoarse_size; 3461 ocoarse_size = pcbddc->coarse_size; 3462 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3463 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3464 /* see if we can avoid some work */ 3465 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3466 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3467 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3468 coarse_reuse = PETSC_FALSE; 3469 } else { /* we can safely reuse already computed coarse matrix */ 3470 coarse_reuse = PETSC_TRUE; 3471 } 3472 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3473 coarse_reuse = PETSC_FALSE; 3474 } 3475 /* reset any subassembling information */ 3476 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3477 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3478 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3479 coarse_reuse = PETSC_TRUE; 3480 } 3481 3482 /* count "active" (i.e. with positive local size) and "void" processes */ 3483 im_active = !!(pcis->n); 3484 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3485 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3486 void_procs = all_procs-active_procs; 3487 csin_type_simple = PETSC_TRUE; 3488 redist = PETSC_FALSE; 3489 if (pcbddc->current_level && void_procs) { 3490 csin_ml = PETSC_TRUE; 3491 ncoarse_ml = void_procs; 3492 csin_ds = PETSC_TRUE; 3493 ncoarse_ds = void_procs; 3494 } else { 3495 csin_ml = PETSC_FALSE; 3496 ncoarse_ml = all_procs; 3497 if (void_procs) { 3498 csin_ds = PETSC_TRUE; 3499 ncoarse_ds = void_procs; 3500 csin_type_simple = PETSC_FALSE; 3501 } else { 3502 if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) { 3503 csin_ds = PETSC_TRUE; 3504 ncoarse_ds = pcbddc->redistribute_coarse; 3505 redist = PETSC_TRUE; 3506 } else { 3507 csin_ds = PETSC_FALSE; 3508 ncoarse_ds = all_procs; 3509 } 3510 } 3511 } 3512 3513 /* 3514 test if we can go multilevel: three conditions must be satisfied: 3515 - we have not exceeded the number of levels requested 3516 - we can actually subassemble the active processes 3517 - we can find a suitable number of MPI processes where we can place the subassembled problem 3518 */ 3519 multilevel_allowed = PETSC_FALSE; 3520 multilevel_requested = PETSC_FALSE; 3521 if (pcbddc->current_level < pcbddc->max_levels) { 3522 multilevel_requested = PETSC_TRUE; 3523 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3524 multilevel_allowed = PETSC_FALSE; 3525 } else { 3526 multilevel_allowed = PETSC_TRUE; 3527 } 3528 } 3529 /* determine number of process partecipating to coarse solver */ 3530 if (multilevel_allowed) { 3531 ncoarse = ncoarse_ml; 3532 csin = csin_ml; 3533 } else { 3534 ncoarse = ncoarse_ds; 3535 csin = csin_ds; 3536 } 3537 3538 /* creates temporary l2gmap and IS for coarse indexes */ 3539 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3540 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3541 3542 /* creates temporary MATIS object for coarse matrix */ 3543 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3544 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3545 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3546 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3547 #if 0 3548 { 3549 PetscViewer viewer; 3550 char filename[256]; 3551 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3552 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3553 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3554 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3555 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3556 } 3557 #endif 3558 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3559 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3560 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3561 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3562 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3563 3564 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3565 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3566 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3567 const PetscInt *idxs; 3568 ISLocalToGlobalMapping tmap; 3569 3570 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3571 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3572 /* allocate space for temporary storage */ 3573 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3574 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3575 /* allocate for IS array */ 3576 nisdofs = pcbddc->n_ISForDofsLocal; 3577 nisneu = !!pcbddc->NeumannBoundariesLocal; 3578 nis = nisdofs + nisneu; 3579 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3580 /* dofs splitting */ 3581 for (i=0;i<nisdofs;i++) { 3582 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3583 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3584 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3585 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3586 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3587 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3588 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3589 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3590 } 3591 /* neumann boundaries */ 3592 if (pcbddc->NeumannBoundariesLocal) { 3593 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3594 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3595 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3596 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3597 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3598 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3599 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3600 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3601 } 3602 /* free memory */ 3603 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3604 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3605 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3606 } else { 3607 nis = 0; 3608 nisdofs = 0; 3609 nisneu = 0; 3610 isarray = NULL; 3611 } 3612 /* destroy no longer needed map */ 3613 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3614 3615 /* restrict on coarse candidates (if needed) */ 3616 coarse_mat_is = NULL; 3617 if (csin) { 3618 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3619 if (redist) { 3620 PetscMPIInt rank; 3621 PetscInt spc,n_spc_p1,dest[1]; 3622 3623 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3624 spc = all_procs/pcbddc->redistribute_coarse; 3625 n_spc_p1 = all_procs%pcbddc->redistribute_coarse; 3626 if (rank > n_spc_p1*(spc+1)-1) { 3627 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 3628 } else { 3629 dest[0] = rank/(spc+1); 3630 } 3631 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3632 } else { 3633 PetscInt j,tissize,*nisindices; 3634 PetscInt *coarse_candidates; 3635 const PetscInt* tisindices; 3636 /* get coarse candidates' ranks in pc communicator */ 3637 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 3638 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3639 for (i=0,j=0;i<all_procs;i++) { 3640 if (!coarse_candidates[i]) { 3641 coarse_candidates[j]=i; 3642 j++; 3643 } 3644 } 3645 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3646 /* get a suitable subassembling pattern */ 3647 if (csin_type_simple) { 3648 PetscMPIInt rank; 3649 PetscInt issize,isidx; 3650 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3651 if (im_active) { 3652 issize = 1; 3653 isidx = (PetscInt)rank; 3654 } else { 3655 issize = 0; 3656 isidx = -1; 3657 } 3658 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3659 } else { 3660 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3661 } 3662 if (pcbddc->dbg_flag) { 3663 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3664 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3665 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3666 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3667 for (i=0;i<j;i++) { 3668 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3669 } 3670 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3671 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3672 } 3673 /* shift the pattern on coarse candidates */ 3674 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3675 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3676 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 3677 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3678 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3679 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3680 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3681 } 3682 } 3683 if (pcbddc->dbg_flag) { 3684 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3685 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3686 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3687 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3688 } 3689 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3690 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3691 } else { 3692 if (pcbddc->dbg_flag) { 3693 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3694 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3695 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3696 } 3697 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3698 coarse_mat_is = t_coarse_mat_is; 3699 } 3700 3701 /* create local to global scatters for coarse problem */ 3702 if (compute_vecs) { 3703 PetscInt lrows; 3704 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3705 if (coarse_mat_is) { 3706 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3707 } else { 3708 lrows = 0; 3709 } 3710 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3711 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3712 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3713 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3714 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3715 } 3716 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3717 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3718 3719 /* set defaults for coarse KSP and PC */ 3720 if (multilevel_allowed) { 3721 coarse_ksp_type = KSPRICHARDSON; 3722 coarse_pc_type = PCBDDC; 3723 } else { 3724 coarse_ksp_type = KSPPREONLY; 3725 coarse_pc_type = PCREDUNDANT; 3726 } 3727 3728 /* print some info if requested */ 3729 if (pcbddc->dbg_flag) { 3730 if (!multilevel_allowed) { 3731 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3732 if (multilevel_requested) { 3733 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3734 } else if (pcbddc->max_levels) { 3735 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3736 } 3737 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3738 } 3739 } 3740 3741 /* create the coarse KSP object only once with defaults */ 3742 if (coarse_mat_is) { 3743 MatReuse coarse_mat_reuse; 3744 PetscViewer dbg_viewer = NULL; 3745 if (pcbddc->dbg_flag) { 3746 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3747 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3748 } 3749 if (!pcbddc->coarse_ksp) { 3750 char prefix[256],str_level[16]; 3751 size_t len; 3752 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3753 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3754 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3755 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3756 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3757 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3758 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3759 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3760 /* prefix */ 3761 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3762 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3763 if (!pcbddc->current_level) { 3764 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3765 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3766 } else { 3767 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3768 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3769 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3770 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3771 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3772 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3773 } 3774 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3775 /* allow user customization */ 3776 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3777 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3778 } 3779 3780 /* get some info after set from options */ 3781 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3782 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3783 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3784 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3785 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3786 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3787 isbddc = PETSC_FALSE; 3788 } 3789 if (isredundant) { 3790 KSP inner_ksp; 3791 PC inner_pc; 3792 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3793 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3794 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3795 } 3796 3797 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3798 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3799 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3800 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3801 if (nisdofs) { 3802 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3803 for (i=0;i<nisdofs;i++) { 3804 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3805 } 3806 } 3807 if (nisneu) { 3808 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3809 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3810 } 3811 3812 /* assemble coarse matrix */ 3813 if (coarse_reuse) { 3814 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3815 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3816 coarse_mat_reuse = MAT_REUSE_MATRIX; 3817 } else { 3818 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3819 } 3820 if (isbddc || isnn) { 3821 if (pcbddc->coarsening_ratio > 1) { 3822 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3823 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3824 if (pcbddc->dbg_flag) { 3825 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3826 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3827 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3828 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3829 } 3830 } 3831 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3832 } else { 3833 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 3834 coarse_mat = coarse_mat_is; 3835 } 3836 } else { 3837 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3838 } 3839 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3840 3841 /* propagate symmetry info to coarse matrix */ 3842 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3843 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3844 3845 /* set operators */ 3846 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3847 if (pcbddc->dbg_flag) { 3848 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3849 } 3850 } else { /* processes non partecipating to coarse solver (if any) */ 3851 coarse_mat = 0; 3852 } 3853 ierr = PetscFree(isarray);CHKERRQ(ierr); 3854 #if 0 3855 { 3856 PetscViewer viewer; 3857 char filename[256]; 3858 sprintf(filename,"coarse_mat.m"); 3859 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3860 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3861 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3862 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3863 } 3864 #endif 3865 3866 /* Compute coarse null space (special handling by BDDC only) */ 3867 if (pcbddc->NullSpace) { 3868 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3869 } 3870 3871 if (pcbddc->coarse_ksp) { 3872 Vec crhs,csol; 3873 PetscBool ispreonly; 3874 if (CoarseNullSpace) { 3875 if (isbddc) { 3876 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3877 } else { 3878 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3879 } 3880 } 3881 /* setup coarse ksp */ 3882 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3883 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3884 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3885 /* hack */ 3886 if (!csol) { 3887 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3888 } 3889 if (!crhs) { 3890 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3891 } 3892 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3893 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3894 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3895 KSP check_ksp; 3896 KSPType check_ksp_type; 3897 PC check_pc; 3898 Vec check_vec,coarse_vec; 3899 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 3900 PetscInt its; 3901 PetscBool compute_eigs; 3902 PetscReal *eigs_r,*eigs_c; 3903 PetscInt neigs; 3904 const char *prefix; 3905 3906 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3907 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3908 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3909 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3910 if (ispreonly) { 3911 check_ksp_type = KSPPREONLY; 3912 compute_eigs = PETSC_FALSE; 3913 } else { 3914 check_ksp_type = KSPGMRES; 3915 compute_eigs = PETSC_TRUE; 3916 } 3917 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3918 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3919 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3920 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3921 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 3922 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 3923 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 3924 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 3925 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3926 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3927 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3928 /* create random vec */ 3929 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 3930 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 3931 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3932 if (CoarseNullSpace) { 3933 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3934 } 3935 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3936 /* solve coarse problem */ 3937 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 3938 if (CoarseNullSpace) { 3939 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 3940 } 3941 /* set eigenvalue estimation if preonly has not been requested */ 3942 if (compute_eigs) { 3943 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 3944 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 3945 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 3946 lambda_max = eigs_r[neigs-1]; 3947 lambda_min = eigs_r[0]; 3948 if (pcbddc->use_coarse_estimates) { 3949 if (lambda_max>lambda_min) { 3950 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3951 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3952 } 3953 } 3954 } 3955 3956 /* check coarse problem residual error */ 3957 if (pcbddc->dbg_flag) { 3958 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 3959 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3960 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 3961 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3962 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3963 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3964 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3965 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 3966 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 3967 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 3968 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3969 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3970 if (compute_eigs) { 3971 PetscReal lambda_max_s,lambda_min_s; 3972 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 3973 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3974 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 3975 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 3976 for (i=0;i<neigs;i++) { 3977 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 3978 } 3979 } 3980 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3981 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3982 } 3983 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3984 if (compute_eigs) { 3985 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 3986 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 3987 } 3988 } 3989 } 3990 /* print additional info */ 3991 if (pcbddc->dbg_flag) { 3992 /* waits until all processes reaches this point */ 3993 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 3994 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3995 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3996 } 3997 3998 /* free memory */ 3999 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4000 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4001 PetscFunctionReturn(0); 4002 } 4003 4004 #undef __FUNCT__ 4005 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4006 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4007 { 4008 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4009 PC_IS* pcis = (PC_IS*)pc->data; 4010 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4011 PetscInt i,coarse_size; 4012 PetscInt *local_primal_indices; 4013 PetscErrorCode ierr; 4014 4015 PetscFunctionBegin; 4016 /* Compute global number of coarse dofs */ 4017 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4018 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4019 } 4020 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4021 4022 /* check numbering */ 4023 if (pcbddc->dbg_flag) { 4024 PetscScalar coarsesum,*array; 4025 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4026 4027 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4028 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4029 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4030 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4031 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4032 for (i=0;i<pcbddc->local_primal_size;i++) { 4033 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4034 } 4035 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4036 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4037 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4038 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4039 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4040 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4041 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4042 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4043 for (i=0;i<pcis->n;i++) { 4044 if (array[i] == 1.0) { 4045 set_error = PETSC_TRUE; 4046 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4047 } 4048 } 4049 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4050 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4051 for (i=0;i<pcis->n;i++) { 4052 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4053 } 4054 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4055 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4056 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4057 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4058 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4059 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4060 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4061 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4062 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4063 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4064 for (i=0;i<pcbddc->local_primal_size;i++) { 4065 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4066 } 4067 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4068 } 4069 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4070 if (set_error_reduced) { 4071 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4072 } 4073 } 4074 /* get back data */ 4075 *coarse_size_n = coarse_size; 4076 *local_primal_indices_n = local_primal_indices; 4077 PetscFunctionReturn(0); 4078 } 4079 4080 #undef __FUNCT__ 4081 #define __FUNCT__ "PCBDDCGlobalToLocal" 4082 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4083 { 4084 IS localis_t; 4085 PetscInt i,lsize,*idxs,n; 4086 PetscScalar *vals; 4087 PetscErrorCode ierr; 4088 4089 PetscFunctionBegin; 4090 /* get indices in local ordering exploiting local to global map */ 4091 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4092 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4093 for (i=0;i<lsize;i++) vals[i] = 1.0; 4094 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4095 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4096 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4097 if (idxs) { /* multilevel guard */ 4098 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4099 } 4100 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4101 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4102 ierr = PetscFree(vals);CHKERRQ(ierr); 4103 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4104 /* now compute set in local ordering */ 4105 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4106 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4107 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4108 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4109 for (i=0,lsize=0;i<n;i++) { 4110 if (PetscRealPart(vals[i]) > 0.5) { 4111 lsize++; 4112 } 4113 } 4114 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4115 for (i=0,lsize=0;i<n;i++) { 4116 if (PetscRealPart(vals[i]) > 0.5) { 4117 idxs[lsize++] = i; 4118 } 4119 } 4120 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4121 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4122 *localis = localis_t; 4123 PetscFunctionReturn(0); 4124 } 4125 4126 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4127 #undef __FUNCT__ 4128 #define __FUNCT__ "PCBDDCMatMult_Private" 4129 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4130 { 4131 PCBDDCChange_ctx change_ctx; 4132 PetscErrorCode ierr; 4133 4134 PetscFunctionBegin; 4135 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4136 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4137 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4138 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4139 PetscFunctionReturn(0); 4140 } 4141 4142 #undef __FUNCT__ 4143 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4144 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4145 { 4146 PCBDDCChange_ctx change_ctx; 4147 PetscErrorCode ierr; 4148 4149 PetscFunctionBegin; 4150 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4151 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4152 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4153 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4154 PetscFunctionReturn(0); 4155 } 4156