1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #if 0 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCAdaptiveSelection" 11 PetscErrorCode PCBDDCAdaptiveSelection(PC pc, MatNullSpace* adapt_nnsp) 12 { 13 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 14 PetscErrorCode ierr; 15 16 PetscFunctionBegin; 17 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs[0],pcbddc->mat_graph,...);CHKERRQ(ierr); 18 PetscFunctionReturn(0); 19 } 20 #endif 21 22 #undef __FUNCT__ 23 #define __FUNCT__ "PCBDDCSetUpSolvers" 24 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 25 { 26 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 27 PetscScalar *coarse_submat_vals; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 /* Setup local scatters R_to_B and (optionally) R_to_D */ 32 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 33 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 34 35 /* Setup local neumann solver ksp_R */ 36 /* PCBDDCSetUpLocalScatters should be called first! */ 37 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 38 39 /* Change global null space passed in by the user if change of basis has been requested */ 40 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 41 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 42 } 43 44 /* 45 Setup local correction and local part of coarse basis. 46 Gives back the dense local part of the coarse matrix in column major ordering 47 */ 48 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 49 50 /* Compute total number of coarse nodes and setup coarse solver */ 51 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 52 53 /* free */ 54 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "PCBDDCResetCustomization" 60 PetscErrorCode PCBDDCResetCustomization(PC pc) 61 { 62 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 63 PetscErrorCode ierr; 64 65 PetscFunctionBegin; 66 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 67 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 68 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 69 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 70 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 71 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 72 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 73 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 74 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 75 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 76 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCResetTopography" 82 PetscErrorCode PCBDDCResetTopography(PC pc) 83 { 84 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 89 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 91 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 92 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs[0]);CHKERRQ(ierr); 93 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs[1]);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 97 #undef __FUNCT__ 98 #define __FUNCT__ "PCBDDCResetSolvers" 99 PetscErrorCode PCBDDCResetSolvers(PC pc) 100 { 101 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 102 PetscErrorCode ierr; 103 104 PetscFunctionBegin; 105 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 106 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 107 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 108 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 109 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 110 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 111 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 112 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 113 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 114 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 115 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 116 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 117 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 118 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 119 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 120 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 121 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 122 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 123 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 124 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 125 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 126 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 127 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 128 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 134 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 135 { 136 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 137 PC_IS *pcis = (PC_IS*)pc->data; 138 VecType impVecType; 139 PetscInt n_constraints,n_R,old_size; 140 PetscErrorCode ierr; 141 142 PetscFunctionBegin; 143 if (!pcbddc->ConstraintMatrix) { 144 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 145 } 146 /* get sizes */ 147 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 148 n_R = pcis->n-pcbddc->n_actual_vertices; 149 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 150 /* local work vectors (try to avoid unneeded work)*/ 151 /* R nodes */ 152 old_size = -1; 153 if (pcbddc->vec1_R) { 154 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 155 } 156 if (n_R != old_size) { 157 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 158 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 159 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 160 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 161 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 162 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 163 } 164 /* local primal dofs */ 165 old_size = -1; 166 if (pcbddc->vec1_P) { 167 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 168 } 169 if (pcbddc->local_primal_size != old_size) { 170 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 171 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 172 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 173 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 174 } 175 /* local explicit constraints */ 176 old_size = -1; 177 if (pcbddc->vec1_C) { 178 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 179 } 180 if (n_constraints && n_constraints != old_size) { 181 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 182 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 183 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 184 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 185 } 186 PetscFunctionReturn(0); 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "PCBDDCSetUpCorrection" 191 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 192 { 193 PetscErrorCode ierr; 194 /* pointers to pcis and pcbddc */ 195 PC_IS* pcis = (PC_IS*)pc->data; 196 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 197 /* submatrices of local problem */ 198 Mat A_RV,A_VR,A_VV; 199 /* working matrices */ 200 Mat M1,M2,M3,C_CR; 201 /* working vectors */ 202 Vec vec1_C,vec2_C,vec1_V,vec2_V; 203 /* additional working stuff */ 204 IS is_aux; 205 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 206 const PetscScalar *array,*row_cmat_values; 207 const PetscInt *row_cmat_indices,*idx_R_local; 208 PetscInt *idx_V_B,*auxindices; 209 PetscInt n_vertices,n_constraints,size_of_constraint; 210 PetscInt i,j,n_R,n_D,n_B; 211 PetscBool unsymmetric_check; 212 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 213 MatType impMatType; 214 /* some shortcuts to scalars */ 215 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 216 /* for debugging purposes */ 217 PetscReal *coarsefunctions_errors,*constraints_errors; 218 219 PetscFunctionBegin; 220 /* get number of vertices (corners plus constraints with change of basis) 221 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 222 n_vertices = pcbddc->n_actual_vertices; 223 n_constraints = pcbddc->local_primal_size-n_vertices; 224 /* Set Non-overlapping dimensions */ 225 n_B = pcis->n_B; n_D = pcis->n - n_B; 226 n_R = pcis->n-n_vertices; 227 228 /* Set types for local objects needed by BDDC precondtioner */ 229 impMatType = MATSEQDENSE; 230 231 /* Allocating some extra storage just to be safe */ 232 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 233 for (i=0;i<pcis->n;i++) auxindices[i]=i; 234 235 /* vertices in boundary numbering */ 236 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 237 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 238 if (i != n_vertices) { 239 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 240 } 241 242 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 243 if (n_constraints) { 244 /* see if we can save some allocations */ 245 if (pcbddc->local_auxmat2) { 246 PetscInt on_R,on_constraints; 247 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 248 if (on_R != n_R || on_constraints != n_constraints) { 249 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 250 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 251 } 252 } 253 /* work vectors */ 254 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 255 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 256 /* auxiliary matrices */ 257 if (!pcbddc->local_auxmat2) { 258 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 259 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 260 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 261 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 262 } 263 264 /* Extract constraints on R nodes: C_{CR} */ 265 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 266 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 267 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 268 269 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 270 for (i=0;i<n_constraints;i++) { 271 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 272 /* Get row of constraint matrix in R numbering */ 273 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 274 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 275 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 276 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 277 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 278 /* Solve for row of constraint matrix in R numbering */ 279 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 280 /* Set values in local_auxmat2 */ 281 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 282 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 283 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 284 } 285 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 286 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 287 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 288 289 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 290 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 291 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 292 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 293 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 294 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 295 ierr = MatSetUp(M1);CHKERRQ(ierr); 296 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 297 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 298 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 299 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 300 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 301 ierr = MatDestroy(&M2);CHKERRQ(ierr); 302 ierr = MatDestroy(&M3);CHKERRQ(ierr); 303 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 304 if (!pcbddc->local_auxmat1) { 305 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 306 } else { 307 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 308 } 309 } 310 311 /* Get submatrices from subdomain matrix */ 312 if (n_vertices) { 313 PetscInt ibs,mbs; 314 PetscBool issbaij; 315 Mat newmat; 316 317 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 318 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 319 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 320 if (ibs != mbs) { /* need to convert to SEQAIJ */ 321 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 322 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 323 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 324 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 325 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 326 } else { 327 /* this is safe */ 328 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 329 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 330 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 331 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 332 /* which of the two approaches is faster? */ 333 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 334 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 335 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 336 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 337 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 338 } else { 339 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 340 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 341 } 342 } 343 ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 344 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 345 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 346 } 347 348 /* Matrix of coarse basis functions (local) */ 349 if (pcbddc->coarse_phi_B) { 350 PetscInt on_B,on_primal; 351 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 352 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 353 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 354 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 355 } 356 } 357 if (pcbddc->coarse_phi_D) { 358 PetscInt on_D,on_primal; 359 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 360 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 361 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 362 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 363 } 364 } 365 if (!pcbddc->coarse_phi_B) { 366 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 367 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 368 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 369 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 370 } 371 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 372 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 373 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 374 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 375 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 376 } 377 378 if (pcbddc->dbg_flag) { 379 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 380 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 381 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 382 } 383 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 384 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 385 386 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 387 388 /* vertices */ 389 for (i=0;i<n_vertices;i++) { 390 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 391 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 392 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 393 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 394 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 395 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 396 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 397 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 398 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 399 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 400 if (n_constraints) { 401 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 402 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 403 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 404 } 405 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 406 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 407 408 /* Set values in coarse basis function and subdomain part of coarse_mat */ 409 /* coarse basis functions */ 410 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 411 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 412 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 413 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 414 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 416 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 417 if (pcbddc->switch_static || pcbddc->dbg_flag) { 418 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 419 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 420 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 421 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 422 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 423 } 424 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 425 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 426 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 427 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 428 if (n_constraints) { 429 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 430 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 431 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 432 } 433 434 /* check */ 435 if (pcbddc->dbg_flag) { 436 /* assemble subdomain vector on local nodes */ 437 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 438 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 439 if (n_R) { 440 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 441 } 442 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 443 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 444 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 445 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 446 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 447 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 448 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 449 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 450 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 451 if (n_constraints) { 452 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 453 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 454 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 455 } 456 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 457 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 458 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 459 /* check saddle point solution */ 460 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 461 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 462 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 463 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 464 /* shift by the identity matrix */ 465 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 466 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 467 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 468 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 469 } 470 } 471 472 /* constraints */ 473 for (i=0;i<n_constraints;i++) { 474 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 475 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 476 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 477 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 478 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 479 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 480 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 481 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 482 if (n_vertices) { 483 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 484 } 485 /* Set values in coarse basis function and subdomain part of coarse_mat */ 486 /* coarse basis functions */ 487 j = i+n_vertices; /* don't touch this! */ 488 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 489 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 490 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 491 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 492 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 493 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 494 if (pcbddc->switch_static || pcbddc->dbg_flag) { 495 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 496 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 497 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 498 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 499 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 500 } 501 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 502 if (n_vertices) { 503 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 504 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 505 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 506 } 507 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 508 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 509 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 510 511 if (pcbddc->dbg_flag) { 512 /* assemble subdomain vector on nodes */ 513 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 514 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 515 if (n_R) { 516 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 517 } 518 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 519 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 520 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 521 /* assemble subdomain vector of lagrange multipliers */ 522 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 523 if (n_vertices) { 524 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 525 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 526 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 527 } 528 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 529 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 530 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 531 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 532 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 533 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 534 /* check saddle point solution */ 535 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 536 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 537 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 538 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 539 /* shift by the identity matrix */ 540 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 541 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 542 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 543 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 544 } 545 } 546 /* call assembling routines for local coarse basis */ 547 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 548 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 549 if (pcbddc->switch_static || pcbddc->dbg_flag) { 550 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 } 553 554 /* compute other basis functions for non-symmetric problems */ 555 /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/ 556 { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */ 557 PetscBool setsym; 558 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr); 559 if (!setsym) pcbddc->issym = PETSC_FALSE; 560 } 561 562 if (!pcbddc->issym) { 563 if (!pcbddc->coarse_psi_B) { 564 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 565 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 566 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 567 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 568 } 569 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 570 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 571 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 572 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 573 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 574 } 575 for (i=0;i<pcbddc->local_primal_size;i++) { 576 if (n_constraints) { 577 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 578 for (j=0;j<n_constraints;j++) { 579 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 580 } 581 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 582 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 583 } 584 if (i<n_vertices) { 585 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 586 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 587 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 588 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 589 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 590 if (n_constraints) { 591 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 592 } 593 } else { 594 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 595 } 596 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 597 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 598 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 599 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 600 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 601 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 602 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 603 if (i<n_vertices) { 604 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 605 } 606 if (pcbddc->switch_static || pcbddc->dbg_flag) { 607 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 608 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 609 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 610 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 611 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 612 } 613 614 if (pcbddc->dbg_flag) { 615 /* assemble subdomain vector on nodes */ 616 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 617 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 618 if (n_R) { 619 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 620 } 621 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 622 if (i<n_vertices) { 623 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 624 } 625 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 626 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 627 /* assemble subdomain vector of lagrange multipliers */ 628 for (j=0;j<pcbddc->local_primal_size;j++) { 629 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 630 } 631 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 632 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 633 /* check saddle point solution */ 634 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 635 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 636 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 637 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 638 /* shift by the identity matrix */ 639 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 640 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 641 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 642 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 643 } 644 } 645 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 646 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 647 if (pcbddc->switch_static || pcbddc->dbg_flag) { 648 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 649 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 650 } 651 unsymmetric_check = PETSC_TRUE; 652 } else { /* take references to already computed coarse basis */ 653 unsymmetric_check = PETSC_FALSE; 654 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 655 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 656 if (pcbddc->coarse_phi_D) { 657 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 658 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 659 } 660 } 661 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 662 /* Checking coarse_sub_mat and coarse basis functios */ 663 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 664 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 665 if (pcbddc->dbg_flag) { 666 Mat coarse_sub_mat; 667 Mat AUXMAT,TM1,TM2,TM3,TM4; 668 Mat coarse_phi_D,coarse_phi_B; 669 Mat coarse_psi_D,coarse_psi_B; 670 Mat A_II,A_BB,A_IB,A_BI; 671 MatType checkmattype=MATSEQAIJ; 672 PetscReal real_value; 673 674 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 675 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 676 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 677 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 678 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 679 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 680 if (unsymmetric_check) { 681 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 682 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 683 } 684 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 685 686 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 687 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 688 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 689 if (unsymmetric_check) { 690 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 691 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 692 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 693 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 694 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 695 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 696 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 697 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 698 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 699 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 700 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 701 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 702 } else { 703 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 704 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 705 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 706 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 707 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 708 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 709 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 710 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 711 } 712 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 713 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 714 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 715 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 716 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 717 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 718 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 719 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 720 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 721 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 722 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 723 for (i=0;i<pcbddc->local_primal_size;i++) { 724 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 725 } 726 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 727 for (i=0;i<pcbddc->local_primal_size;i++) { 728 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 729 } 730 if (unsymmetric_check) { 731 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 732 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 733 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 734 } 735 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 736 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 737 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 738 } 739 } 740 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 741 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 742 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 743 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 744 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 745 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 746 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 747 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 748 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 749 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 750 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 751 if (unsymmetric_check) { 752 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 753 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 754 } 755 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 756 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 757 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 758 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 759 } 760 /* free memory */ 761 if (n_vertices) { 762 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 763 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 764 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 765 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 766 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 767 } 768 if (n_constraints) { 769 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 770 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 771 ierr = MatDestroy(&M1);CHKERRQ(ierr); 772 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 773 } 774 ierr = PetscFree(auxindices);CHKERRQ(ierr); 775 /* get back data */ 776 *coarse_submat_vals_n = coarse_submat_vals; 777 PetscFunctionReturn(0); 778 } 779 780 #undef __FUNCT__ 781 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 782 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 783 { 784 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 785 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 786 Mat *change_mat_all; 787 IS is_local,is_global; 788 PetscBool sorted,isseqaij; 789 PetscInt local_size,*idxs_perm; 790 PetscErrorCode ierr; 791 792 PetscFunctionBegin; 793 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 794 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 795 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 796 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 797 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 798 ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr); 799 if (!sorted) { 800 const PetscInt *idxs; 801 PetscInt *idxs_sorted,i; 802 803 ierr = PetscMalloc1(local_size,&idxs_perm);CHKERRQ(ierr); 804 ierr = PetscMalloc1(local_size,&idxs_sorted);CHKERRQ(ierr); 805 for (i=0;i<local_size;i++) { 806 idxs_perm[i] = i; 807 } 808 ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr); 809 ierr = PetscSortIntWithPermutation(local_size,idxs,idxs_perm);CHKERRQ(ierr); 810 for (i=0;i<local_size;i++) { 811 idxs_sorted[i] = idxs[idxs_perm[i]]; 812 } 813 ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr); 814 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 815 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr); 816 } 817 818 /* get change of basis on the whole set of local dofs */ 819 ierr = MatGetSubMatrices(ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr); 820 821 if (!sorted) { 822 Mat new_mat; 823 IS is_perm; 824 PetscInt *idxs,i; 825 826 ierr = PetscMalloc1(local_size,&idxs);CHKERRQ(ierr); 827 for (i=0;i<local_size;i++) { 828 idxs[idxs_perm[i]] = i; 829 } 830 ierr = PetscFree(idxs_perm);CHKERRQ(ierr); 831 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr); 832 ierr = ISSetPermutation(is_perm);CHKERRQ(ierr); 833 ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr); 834 ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr); 835 change_mat_all[0] = new_mat; 836 ierr = ISDestroy(&is_perm);CHKERRQ(ierr); 837 } 838 839 /* check */ 840 if (pcbddc->dbg_flag) { 841 Vec x,x_change; 842 PetscReal error; 843 844 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 845 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 846 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 847 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 848 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 849 ierr = MatMult(change_mat_all[0],matis->x,matis->y);CHKERRQ(ierr); 850 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 851 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 852 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 853 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 854 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 855 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 856 ierr = VecDestroy(&x);CHKERRQ(ierr); 857 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 858 } 859 860 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 861 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 862 if (isseqaij) { 863 ierr = MatPtAP(matis->A,change_mat_all[0],MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 864 } else { 865 Mat work_mat; 866 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 867 ierr = MatPtAP(work_mat,change_mat_all[0],MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 868 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 869 } 870 /* 871 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 872 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 873 */ 874 ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr); 875 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 879 #undef __FUNCT__ 880 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 881 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 882 { 883 PC_IS* pcis = (PC_IS*)(pc->data); 884 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 885 IS is_aux1,is_aux2; 886 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 887 PetscInt n_vertices,i,j,n_R,n_D,n_B; 888 PetscInt vbs,bs; 889 PetscBT bitmask; 890 PetscErrorCode ierr; 891 892 PetscFunctionBegin; 893 /* 894 No need to setup local scatters if 895 - primal space is unchanged 896 AND 897 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 898 AND 899 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 900 */ 901 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 902 PetscFunctionReturn(0); 903 } 904 /* destroy old objects */ 905 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 906 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 907 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 908 /* Set Non-overlapping dimensions */ 909 n_B = pcis->n_B; n_D = pcis->n - n_B; 910 n_vertices = pcbddc->n_actual_vertices; 911 /* create auxiliary bitmask */ 912 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 913 for (i=0;i<n_vertices;i++) { 914 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 915 } 916 917 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 918 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 919 for (i=0, n_R=0; i<pcis->n; i++) { 920 if (!PetscBTLookup(bitmask,i)) { 921 idx_R_local[n_R] = i; 922 n_R++; 923 } 924 } 925 926 /* Block code */ 927 vbs = 1; 928 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 929 if (bs>1 && !(n_vertices%bs)) { 930 PetscBool is_blocked = PETSC_TRUE; 931 PetscInt *vary; 932 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 933 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 934 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 935 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 936 for (i=0; i<n_vertices; i++) { 937 if (vary[i]!=0 && vary[i]!=bs) { 938 is_blocked = PETSC_FALSE; 939 break; 940 } 941 } 942 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 943 vbs = bs; 944 for (i=0;i<n_R/vbs;i++) { 945 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 946 } 947 } 948 ierr = PetscFree(vary);CHKERRQ(ierr); 949 } 950 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 951 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 952 953 /* print some info if requested */ 954 if (pcbddc->dbg_flag) { 955 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 956 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 957 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 958 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 959 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 960 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 961 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 962 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 963 } 964 965 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 966 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 967 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 968 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 969 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 970 for (i=0; i<n_D; i++) { 971 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 972 } 973 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 974 for (i=0, j=0; i<n_R; i++) { 975 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 976 aux_array1[j++] = i; 977 } 978 } 979 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 980 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 981 for (i=0, j=0; i<n_B; i++) { 982 if (!PetscBTLookup(bitmask,is_indices[i])) { 983 aux_array2[j++] = i; 984 } 985 } 986 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 987 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 988 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 989 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 990 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 991 992 if (pcbddc->switch_static || pcbddc->dbg_flag) { 993 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 994 for (i=0, j=0; i<n_R; i++) { 995 if (PetscBTLookup(bitmask,idx_R_local[i])) { 996 aux_array1[j++] = i; 997 } 998 } 999 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1000 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1001 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1002 } 1003 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1004 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1011 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1012 { 1013 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1014 PC_IS *pcis = (PC_IS*)pc->data; 1015 PC pc_temp; 1016 Mat A_RR; 1017 MatReuse reuse; 1018 PetscScalar m_one = -1.0; 1019 PetscReal value; 1020 PetscInt n_D,n_R,ibs,mbs; 1021 PetscBool use_exact,use_exact_reduced,issbaij; 1022 PetscErrorCode ierr; 1023 /* prefixes stuff */ 1024 char dir_prefix[256],neu_prefix[256],str_level[16]; 1025 size_t len; 1026 1027 PetscFunctionBegin; 1028 1029 /* compute prefixes */ 1030 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1031 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1032 if (!pcbddc->current_level) { 1033 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1034 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1035 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1036 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1037 } else { 1038 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1039 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1040 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1041 len -= 15; /* remove "pc_bddc_coarse_" */ 1042 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1043 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1044 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1045 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1046 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1047 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1048 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1049 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1050 } 1051 1052 /* DIRICHLET PROBLEM */ 1053 if (dirichlet) { 1054 /* Matrix for Dirichlet problem is pcis->A_II */ 1055 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1056 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1057 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1058 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1059 /* default */ 1060 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1061 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1062 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1063 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1064 if (issbaij) { 1065 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1066 } else { 1067 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1068 } 1069 /* Allow user's customization */ 1070 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1071 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1072 } 1073 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1074 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1075 if (!n_D) { 1076 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1077 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1078 } 1079 /* Set Up KSP for Dirichlet problem of BDDC */ 1080 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1081 /* set ksp_D into pcis data */ 1082 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1083 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1084 pcis->ksp_D = pcbddc->ksp_D; 1085 } 1086 1087 /* NEUMANN PROBLEM */ 1088 A_RR = 0; 1089 if (neumann) { 1090 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1091 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1092 if (pcbddc->ksp_R) { /* already created ksp */ 1093 PetscInt nn_R; 1094 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1095 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1096 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1097 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1098 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1099 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1100 reuse = MAT_INITIAL_MATRIX; 1101 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1102 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1103 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1104 reuse = MAT_INITIAL_MATRIX; 1105 } else { /* safe to reuse the matrix */ 1106 reuse = MAT_REUSE_MATRIX; 1107 } 1108 } 1109 /* last check */ 1110 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1111 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1112 reuse = MAT_INITIAL_MATRIX; 1113 } 1114 } else { /* first time, so we need to create the matrix */ 1115 reuse = MAT_INITIAL_MATRIX; 1116 } 1117 /* extract A_RR */ 1118 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1119 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1120 if (ibs != mbs) { 1121 Mat newmat; 1122 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1123 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1124 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1125 } else { 1126 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1127 } 1128 if (!pcbddc->ksp_R) { /* create object if not present */ 1129 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1130 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1131 /* default */ 1132 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1133 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1134 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1135 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1136 if (issbaij) { 1137 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1138 } else { 1139 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1140 } 1141 /* Allow user's customization */ 1142 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1143 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1144 } 1145 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1146 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1147 if (!n_R) { 1148 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1149 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1150 } 1151 /* Set Up KSP for Neumann problem of BDDC */ 1152 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1153 } 1154 1155 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1156 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1157 if (pcbddc->dbg_flag) { 1158 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1159 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1160 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1161 } 1162 if (dirichlet) { /* Dirichlet */ 1163 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1164 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1165 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1166 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1167 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1168 /* need to be adapted? */ 1169 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1170 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1171 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1172 /* print info */ 1173 if (pcbddc->dbg_flag) { 1174 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1175 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1176 } 1177 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1178 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1179 } 1180 } 1181 if (neumann) { /* Neumann */ 1182 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1183 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1184 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1185 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1186 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1187 /* need to be adapted? */ 1188 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1189 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1190 /* print info */ 1191 if (pcbddc->dbg_flag) { 1192 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1193 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1194 } 1195 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1196 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1197 } 1198 } 1199 } 1200 /* free Neumann problem's matrix */ 1201 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1207 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1208 { 1209 PetscErrorCode ierr; 1210 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1211 1212 PetscFunctionBegin; 1213 if (applytranspose) { 1214 if (pcbddc->local_auxmat1) { 1215 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1216 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1217 } 1218 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1219 } else { 1220 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1221 if (pcbddc->local_auxmat1) { 1222 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1223 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1224 } 1225 } 1226 PetscFunctionReturn(0); 1227 } 1228 1229 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1232 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1233 { 1234 PetscErrorCode ierr; 1235 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1236 PC_IS* pcis = (PC_IS*) (pc->data); 1237 const PetscScalar zero = 0.0; 1238 1239 PetscFunctionBegin; 1240 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1241 if (applytranspose) { 1242 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1243 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1244 } else { 1245 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1246 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1247 } 1248 /* start communications from local primal nodes to rhs of coarse solver */ 1249 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1250 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1251 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1252 1253 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1254 /* TODO remove null space when doing multilevel */ 1255 if (pcbddc->coarse_ksp) { 1256 if (applytranspose) { 1257 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1258 } else { 1259 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1260 } 1261 } 1262 1263 /* Local solution on R nodes */ 1264 if (pcis->n) { 1265 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1266 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1267 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1268 if (pcbddc->switch_static) { 1269 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1270 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1271 } 1272 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1273 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1274 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1275 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1276 if (pcbddc->switch_static) { 1277 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1278 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1279 } 1280 } 1281 1282 /* communications from coarse sol to local primal nodes */ 1283 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1284 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1285 1286 /* Sum contributions from two levels */ 1287 if (applytranspose) { 1288 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1289 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1290 } else { 1291 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1292 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1293 } 1294 PetscFunctionReturn(0); 1295 } 1296 1297 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1298 #undef __FUNCT__ 1299 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1300 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1301 { 1302 PetscErrorCode ierr; 1303 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1304 PetscScalar *array,*array2; 1305 Vec from,to; 1306 1307 PetscFunctionBegin; 1308 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1309 from = pcbddc->coarse_vec; 1310 to = pcbddc->vec1_P; 1311 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1312 Vec tvec; 1313 PetscInt lsize; 1314 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1315 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1316 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1317 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1318 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1319 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1320 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1321 } 1322 } else { /* from local to global -> put data in coarse right hand side */ 1323 from = pcbddc->vec1_P; 1324 to = pcbddc->coarse_vec; 1325 } 1326 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1327 PetscFunctionReturn(0); 1328 } 1329 1330 #undef __FUNCT__ 1331 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1332 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1333 { 1334 PetscErrorCode ierr; 1335 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1336 PetscScalar *array,*array2; 1337 Vec from,to; 1338 1339 PetscFunctionBegin; 1340 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1341 from = pcbddc->coarse_vec; 1342 to = pcbddc->vec1_P; 1343 } else { /* from local to global -> put data in coarse right hand side */ 1344 from = pcbddc->vec1_P; 1345 to = pcbddc->coarse_vec; 1346 } 1347 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1348 if (smode == SCATTER_FORWARD) { 1349 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1350 Vec tvec; 1351 PetscInt lsize; 1352 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1353 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1354 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1355 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1356 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1357 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1358 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1359 } 1360 } 1361 PetscFunctionReturn(0); 1362 } 1363 1364 /* uncomment for testing purposes */ 1365 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1368 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1369 { 1370 PetscErrorCode ierr; 1371 PC_IS* pcis = (PC_IS*)(pc->data); 1372 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1373 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1374 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1375 MatType impMatType=MATSEQAIJ; 1376 /* one and zero */ 1377 PetscScalar one=1.0,zero=0.0; 1378 /* space to store constraints and their local indices */ 1379 PetscScalar *temp_quadrature_constraint; 1380 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1381 /* iterators */ 1382 PetscInt i,j,k,total_counts,temp_start_ptr; 1383 /* stuff to store connected components stored in pcbddc->mat_graph */ 1384 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1385 PetscInt n_ISForFaces,n_ISForEdges; 1386 /* near null space stuff */ 1387 MatNullSpace nearnullsp; 1388 const Vec *nearnullvecs; 1389 Vec *localnearnullsp; 1390 PetscBool nnsp_has_cnst; 1391 PetscInt nnsp_size; 1392 PetscScalar *array; 1393 /* BLAS integers */ 1394 PetscBLASInt lwork,lierr; 1395 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1396 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1397 /* LAPACK working arrays for SVD or POD */ 1398 PetscBool skip_lapack; 1399 PetscScalar *work; 1400 PetscReal *singular_vals; 1401 #if defined(PETSC_USE_COMPLEX) 1402 PetscReal *rwork; 1403 #endif 1404 #if defined(PETSC_MISSING_LAPACK_GESVD) 1405 PetscBLASInt Blas_one_2=1; 1406 PetscScalar *temp_basis,*correlation_mat; 1407 #else 1408 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1409 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1410 #endif 1411 /* reuse */ 1412 PetscInt olocal_primal_size; 1413 PetscInt *oprimal_indices_local_idxs; 1414 /* change of basis */ 1415 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1416 PetscBool boolforchange,qr_needed; 1417 PetscBT touched,change_basis,qr_needed_idx; 1418 /* auxiliary stuff */ 1419 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1420 PetscInt ncc,*gidxs,*permutation,*temp_indices_to_constraint_work; 1421 PetscScalar *temp_quadrature_constraint_work; 1422 /* some quantities */ 1423 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1424 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1425 1426 1427 PetscFunctionBegin; 1428 /* Destroy Mat objects computed previously */ 1429 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1430 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1431 /* Get index sets for faces, edges and vertices from graph */ 1432 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1433 /* free unneeded index sets */ 1434 if (!pcbddc->use_vertices) { 1435 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1436 } 1437 if (!pcbddc->use_edges) { 1438 for (i=0;i<n_ISForEdges;i++) { 1439 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1440 } 1441 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1442 n_ISForEdges = 0; 1443 } 1444 if (!pcbddc->use_faces) { 1445 for (i=0;i<n_ISForFaces;i++) { 1446 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1447 } 1448 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1449 n_ISForFaces = 0; 1450 } 1451 /* HACKS (the following two blocks of code) */ 1452 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1453 pcbddc->use_change_of_basis = PETSC_TRUE; 1454 if (!ISForEdges) { 1455 pcbddc->use_change_on_faces = PETSC_TRUE; 1456 } 1457 } 1458 if (pcbddc->NullSpace) { 1459 /* use_change_of_basis should be consistent among processors */ 1460 PetscBool tbool[2],gbool[2]; 1461 tbool [0] = pcbddc->use_change_of_basis; 1462 tbool [1] = pcbddc->use_change_on_faces; 1463 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1464 pcbddc->use_change_of_basis = gbool[0]; 1465 pcbddc->use_change_on_faces = gbool[1]; 1466 } 1467 /* print some info */ 1468 if (pcbddc->dbg_flag) { 1469 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1470 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1471 i = 0; 1472 if (ISForVertices) { 1473 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1474 } 1475 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1476 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1478 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1479 } 1480 /* check if near null space is attached to global mat */ 1481 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1482 if (nearnullsp) { 1483 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1484 /* remove any stored info */ 1485 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1486 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1487 /* store information for BDDC solver reuse */ 1488 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1489 pcbddc->onearnullspace = nearnullsp; 1490 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1491 for (i=0;i<nnsp_size;i++) { 1492 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1493 } 1494 } else { /* if near null space is not provided BDDC uses constants by default */ 1495 nnsp_size = 0; 1496 nnsp_has_cnst = PETSC_TRUE; 1497 } 1498 /* get max number of constraints on a single cc */ 1499 max_constraints = nnsp_size; 1500 if (nnsp_has_cnst) max_constraints++; 1501 1502 /* 1503 Evaluate maximum storage size needed by the procedure 1504 - temp_indices will contain start index of each constraint stored as follows 1505 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1506 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1507 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1508 */ 1509 total_counts = n_ISForFaces+n_ISForEdges; 1510 total_counts *= max_constraints; 1511 n_vertices = 0; 1512 if (ISForVertices) { 1513 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1514 } 1515 total_counts += n_vertices; 1516 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1517 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1518 total_counts = 0; 1519 max_size_of_constraint = 0; 1520 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1521 if (i<n_ISForEdges) { 1522 used_IS = &ISForEdges[i]; 1523 } else { 1524 used_IS = &ISForFaces[i-n_ISForEdges]; 1525 } 1526 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1527 total_counts += j; 1528 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1529 } 1530 total_counts *= max_constraints; 1531 total_counts += n_vertices; 1532 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1533 /* get local part of global near null space vectors */ 1534 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1535 for (k=0;k<nnsp_size;k++) { 1536 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1537 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1538 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1539 } 1540 1541 /* whether or not to skip lapack calls */ 1542 skip_lapack = PETSC_TRUE; 1543 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1544 1545 /* allocate some auxiliary stuff */ 1546 if (!skip_lapack || pcbddc->use_qr_single) { 1547 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1548 } 1549 1550 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1551 if (!skip_lapack) { 1552 PetscScalar temp_work; 1553 1554 #if defined(PETSC_MISSING_LAPACK_GESVD) 1555 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1556 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1557 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1558 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1559 #if defined(PETSC_USE_COMPLEX) 1560 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1561 #endif 1562 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1563 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1564 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1565 lwork = -1; 1566 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1567 #if !defined(PETSC_USE_COMPLEX) 1568 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1569 #else 1570 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1571 #endif 1572 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1573 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1574 #else /* on missing GESVD */ 1575 /* SVD */ 1576 PetscInt max_n,min_n; 1577 max_n = max_size_of_constraint; 1578 min_n = max_constraints; 1579 if (max_size_of_constraint < max_constraints) { 1580 min_n = max_size_of_constraint; 1581 max_n = max_constraints; 1582 } 1583 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1584 #if defined(PETSC_USE_COMPLEX) 1585 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1586 #endif 1587 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1588 lwork = -1; 1589 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1590 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1591 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1592 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1593 #if !defined(PETSC_USE_COMPLEX) 1594 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1595 #else 1596 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1597 #endif 1598 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1599 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1600 #endif /* on missing GESVD */ 1601 /* Allocate optimal workspace */ 1602 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1603 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1604 } 1605 /* Now we can loop on constraining sets */ 1606 total_counts = 0; 1607 temp_indices[0] = 0; 1608 /* vertices */ 1609 if (ISForVertices) { 1610 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1611 if (nnsp_has_cnst) { /* consider all vertices */ 1612 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1613 for (i=0;i<n_vertices;i++) { 1614 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1615 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1616 total_counts++; 1617 } 1618 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1619 PetscBool used_vertex; 1620 for (i=0;i<n_vertices;i++) { 1621 used_vertex = PETSC_FALSE; 1622 k = 0; 1623 while (!used_vertex && k<nnsp_size) { 1624 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1625 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1626 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1627 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1628 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1629 total_counts++; 1630 used_vertex = PETSC_TRUE; 1631 } 1632 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1633 k++; 1634 } 1635 } 1636 } 1637 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1638 n_vertices = total_counts; 1639 } 1640 1641 /* edges and faces */ 1642 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1643 if (ncc<n_ISForEdges) { 1644 used_IS = &ISForEdges[ncc]; 1645 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1646 } else { 1647 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1648 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1649 } 1650 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1651 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1652 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1653 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1654 /* change of basis should not be performed on local periodic nodes */ 1655 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1656 if (nnsp_has_cnst) { 1657 PetscScalar quad_value; 1658 temp_constraints++; 1659 if (!pcbddc->use_nnsp_true) { 1660 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1661 } else { 1662 quad_value = 1.0; 1663 } 1664 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1665 for (j=0;j<size_of_constraint;j++) { 1666 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1667 } 1668 /* sort by global ordering if using lapack subroutines */ 1669 if (!skip_lapack || pcbddc->use_qr_single) { 1670 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1671 for (j=0;j<size_of_constraint;j++) { 1672 permutation[j]=j; 1673 } 1674 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1675 for (j=0;j<size_of_constraint;j++) { 1676 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1677 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1678 } 1679 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1680 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1681 } 1682 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1683 total_counts++; 1684 } 1685 for (k=0;k<nnsp_size;k++) { 1686 PetscReal real_value; 1687 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1688 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1689 for (j=0;j<size_of_constraint;j++) { 1690 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1691 } 1692 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1693 /* check if array is null on the connected component */ 1694 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1695 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1696 if (real_value > 0.0) { /* keep indices and values */ 1697 /* sort by global ordering if using lapack subroutines */ 1698 if (!skip_lapack || pcbddc->use_qr_single) { 1699 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1700 for (j=0;j<size_of_constraint;j++) { 1701 permutation[j]=j; 1702 } 1703 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1704 for (j=0;j<size_of_constraint;j++) { 1705 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1706 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1707 } 1708 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1709 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1710 } 1711 temp_constraints++; 1712 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1713 total_counts++; 1714 } 1715 } 1716 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1717 valid_constraints = temp_constraints; 1718 if (!pcbddc->use_nnsp_true && temp_constraints) { 1719 if (temp_constraints == 1) { /* just normalize the constraint */ 1720 PetscScalar norm; 1721 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1722 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1723 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1724 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1725 } else { /* perform SVD */ 1726 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1727 1728 #if defined(PETSC_MISSING_LAPACK_GESVD) 1729 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1730 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1731 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1732 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1733 from that computed using LAPACKgesvd 1734 -> This is due to a different computation of eigenvectors in LAPACKheev 1735 -> The quality of the POD-computed basis will be the same */ 1736 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1737 /* Store upper triangular part of correlation matrix */ 1738 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1739 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1740 for (j=0;j<temp_constraints;j++) { 1741 for (k=0;k<j+1;k++) { 1742 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1743 } 1744 } 1745 /* compute eigenvalues and eigenvectors of correlation matrix */ 1746 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1747 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1748 #if !defined(PETSC_USE_COMPLEX) 1749 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1750 #else 1751 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1752 #endif 1753 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1754 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1755 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1756 j = 0; 1757 while (j < temp_constraints && singular_vals[j] < tol) j++; 1758 total_counts = total_counts-j; 1759 valid_constraints = temp_constraints-j; 1760 /* scale and copy POD basis into used quadrature memory */ 1761 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1762 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1763 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1764 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1765 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1766 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1767 if (j<temp_constraints) { 1768 PetscInt ii; 1769 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1770 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1771 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1772 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1773 for (k=0;k<temp_constraints-j;k++) { 1774 for (ii=0;ii<size_of_constraint;ii++) { 1775 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1776 } 1777 } 1778 } 1779 #else /* on missing GESVD */ 1780 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1781 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1782 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1783 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1784 #if !defined(PETSC_USE_COMPLEX) 1785 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1786 #else 1787 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1788 #endif 1789 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1790 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1791 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1792 k = temp_constraints; 1793 if (k > size_of_constraint) k = size_of_constraint; 1794 j = 0; 1795 while (j < k && singular_vals[k-j-1] < tol) j++; 1796 valid_constraints = k-j; 1797 total_counts = total_counts-temp_constraints+valid_constraints; 1798 #endif /* on missing GESVD */ 1799 } 1800 } 1801 /* setting change_of_basis flag is safe now */ 1802 if (boolforchange) { 1803 for (j=0;j<valid_constraints;j++) { 1804 PetscBTSet(change_basis,total_counts-j-1); 1805 } 1806 } 1807 } 1808 /* free index sets of faces, edges and vertices */ 1809 for (i=0;i<n_ISForFaces;i++) { 1810 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1811 } 1812 if (n_ISForFaces) { 1813 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1814 } 1815 for (i=0;i<n_ISForEdges;i++) { 1816 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1817 } 1818 if (n_ISForEdges) { 1819 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1820 } 1821 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1822 /* map temp_indices_to_constraint in boundary numbering */ 1823 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1824 if (i != temp_indices[total_counts]) { 1825 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1826 } 1827 1828 /* free workspace */ 1829 if (!skip_lapack || pcbddc->use_qr_single) { 1830 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 1831 } 1832 if (!skip_lapack) { 1833 ierr = PetscFree(work);CHKERRQ(ierr); 1834 #if defined(PETSC_USE_COMPLEX) 1835 ierr = PetscFree(rwork);CHKERRQ(ierr); 1836 #endif 1837 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1838 #if defined(PETSC_MISSING_LAPACK_GESVD) 1839 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1840 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1841 #endif 1842 } 1843 for (k=0;k<nnsp_size;k++) { 1844 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1845 } 1846 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1847 1848 /* set quantities in pcbddc data structure and store previous primal size */ 1849 /* n_vertices defines the number of subdomain corners in the primal space */ 1850 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1851 olocal_primal_size = pcbddc->local_primal_size; 1852 pcbddc->local_primal_size = total_counts; 1853 pcbddc->n_vertices = n_vertices; 1854 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1855 1856 /* Create constraint matrix */ 1857 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1858 /* so we need to set it up properly either with or without change of basis */ 1859 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1860 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1861 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1862 /* array to compute a local numbering of constraints : vertices first then constraints */ 1863 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1864 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1865 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1866 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1867 /* auxiliary stuff for basis change */ 1868 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1869 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1870 1871 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1872 total_primal_vertices=0; 1873 for (i=0;i<pcbddc->local_primal_size;i++) { 1874 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1875 if (size_of_constraint == 1) { 1876 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1877 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1878 aux_primal_minloc[total_primal_vertices]=0; 1879 total_primal_vertices++; 1880 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1881 PetscInt min_loc,min_index; 1882 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1883 /* find first untouched local node */ 1884 k = 0; 1885 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1886 min_index = global_indices[k]; 1887 min_loc = k; 1888 /* search the minimum among global nodes already untouched on the cc */ 1889 for (k=1;k<size_of_constraint;k++) { 1890 /* there can be more than one constraint on a single connected component */ 1891 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1892 min_index = global_indices[k]; 1893 min_loc = k; 1894 } 1895 } 1896 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1897 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1898 aux_primal_minloc[total_primal_vertices]=min_loc; 1899 total_primal_vertices++; 1900 } 1901 } 1902 /* determine if a QR strategy is needed for change of basis */ 1903 qr_needed = PETSC_FALSE; 1904 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1905 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1906 if (PetscBTLookup(change_basis,i)) { 1907 if (!pcbddc->use_qr_single) { 1908 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1909 j = 0; 1910 for (k=0;k<size_of_constraint;k++) { 1911 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1912 j++; 1913 } 1914 } 1915 /* found more than one primal dof on the cc */ 1916 if (j > 1) { 1917 PetscBTSet(qr_needed_idx,i); 1918 qr_needed = PETSC_TRUE; 1919 } 1920 } else { 1921 PetscBTSet(qr_needed_idx,i); 1922 qr_needed = PETSC_TRUE; 1923 } 1924 } 1925 } 1926 /* free workspace */ 1927 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1928 1929 /* permute indices in order to have a sorted set of vertices */ 1930 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1931 1932 /* nonzero structure of constraint matrix */ 1933 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1934 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1935 j=total_primal_vertices; 1936 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1937 if (!PetscBTLookup(change_basis,i)) { 1938 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1939 j++; 1940 } 1941 } 1942 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1943 ierr = PetscFree(nnz);CHKERRQ(ierr); 1944 /* set values in constraint matrix */ 1945 for (i=0;i<total_primal_vertices;i++) { 1946 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1947 } 1948 total_counts = total_primal_vertices; 1949 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1950 if (!PetscBTLookup(change_basis,i)) { 1951 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1952 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1953 total_counts++; 1954 } 1955 } 1956 /* assembling */ 1957 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1958 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1959 /* 1960 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1961 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1962 */ 1963 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1964 if (pcbddc->use_change_of_basis) { 1965 /* dual and primal dofs on a single cc */ 1966 PetscInt dual_dofs,primal_dofs; 1967 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1968 PetscInt primal_counter; 1969 /* working stuff for GEQRF */ 1970 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 1971 PetscBLASInt lqr_work; 1972 /* working stuff for UNGQR */ 1973 PetscScalar *gqr_work,lgqr_work_t; 1974 PetscBLASInt lgqr_work; 1975 /* working stuff for TRTRS */ 1976 PetscScalar *trs_rhs; 1977 PetscBLASInt Blas_NRHS; 1978 /* pointers for values insertion into change of basis matrix */ 1979 PetscInt *start_rows,*start_cols; 1980 PetscScalar *start_vals; 1981 /* working stuff for values insertion */ 1982 PetscBT is_primal; 1983 /* matrix sizes */ 1984 PetscInt global_size,local_size; 1985 /* work array for nonzeros */ 1986 PetscScalar *nnz_array; 1987 /* temporary change of basis */ 1988 Mat localChangeOfBasisMatrix; 1989 /* auxiliary work for global change of basis */ 1990 Vec nnz_vec; 1991 PetscInt *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz; 1992 PetscInt nvtxs,*xadj,*adjncy,*idxs_mapped; 1993 PetscScalar *vals; 1994 PetscBool done; 1995 1996 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 1997 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 1998 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1999 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2000 2001 /* nonzeros for local mat */ 2002 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 2003 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 2004 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2005 if (PetscBTLookup(change_basis,i)) { 2006 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2007 if (PetscBTLookup(qr_needed_idx,i)) { 2008 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2009 } else { 2010 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 2011 /* get local primal index on the cc */ 2012 j = 0; 2013 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2014 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2015 } 2016 } 2017 } 2018 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2019 /* Set initial identity in the matrix */ 2020 for (i=0;i<pcis->n_B;i++) { 2021 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2022 } 2023 2024 if (pcbddc->dbg_flag) { 2025 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2026 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2027 } 2028 2029 2030 /* Now we loop on the constraints which need a change of basis */ 2031 /* 2032 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2033 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2034 2035 Basic blocks of change of basis matrix T computed by 2036 2037 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2038 2039 | 1 0 ... 0 s_1/S | 2040 | 0 1 ... 0 s_2/S | 2041 | ... | 2042 | 0 ... 1 s_{n-1}/S | 2043 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2044 2045 with S = \sum_{i=1}^n s_i^2 2046 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2047 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2048 2049 - QR decomposition of constraints otherwise 2050 */ 2051 if (qr_needed) { 2052 /* space to store Q */ 2053 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2054 /* first we issue queries for optimal work */ 2055 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2056 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2057 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2058 lqr_work = -1; 2059 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2060 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2061 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2062 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2063 lgqr_work = -1; 2064 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2065 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2066 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2067 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2068 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2069 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2070 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2071 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2072 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2073 /* array to store scaling factors for reflectors */ 2074 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2075 /* array to store rhs and solution of triangular solver */ 2076 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2077 /* allocating workspace for check */ 2078 if (pcbddc->dbg_flag) { 2079 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2080 } 2081 } 2082 /* array to store whether a node is primal or not */ 2083 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2084 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2085 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2086 if (i != total_primal_vertices) { 2087 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2088 } 2089 for (i=0;i<total_primal_vertices;i++) { 2090 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2091 } 2092 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2093 2094 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2095 /* -> using implicit ordering contained in temp_indices data */ 2096 total_counts = pcbddc->n_vertices; 2097 primal_counter = total_counts; 2098 while (total_counts<pcbddc->local_primal_size) { 2099 primal_dofs = 1; 2100 if (PetscBTLookup(change_basis,total_counts)) { 2101 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2102 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2103 primal_dofs++; 2104 } 2105 /* get constraint info */ 2106 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2107 dual_dofs = size_of_constraint-primal_dofs; 2108 2109 if (pcbddc->dbg_flag) { 2110 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2111 } 2112 2113 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2114 2115 /* copy quadrature constraints for change of basis check */ 2116 if (pcbddc->dbg_flag) { 2117 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2118 } 2119 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2120 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2121 2122 /* compute QR decomposition of constraints */ 2123 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2124 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2125 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2126 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2127 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2128 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2129 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2130 2131 /* explictly compute R^-T */ 2132 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2133 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2134 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2135 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2136 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2137 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2138 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2139 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2140 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2141 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2142 2143 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2144 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2145 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2146 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2147 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2148 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2149 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2150 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2151 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2152 2153 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2154 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2155 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2156 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2157 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2158 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2159 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2160 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2161 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2162 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2163 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2164 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2165 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2166 2167 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2168 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2169 /* insert cols for primal dofs */ 2170 for (j=0;j<primal_dofs;j++) { 2171 start_vals = &qr_basis[j*size_of_constraint]; 2172 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2173 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2174 } 2175 /* insert cols for dual dofs */ 2176 for (j=0,k=0;j<dual_dofs;k++) { 2177 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2178 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2179 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2180 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2181 j++; 2182 } 2183 } 2184 2185 /* check change of basis */ 2186 if (pcbddc->dbg_flag) { 2187 PetscInt ii,jj; 2188 PetscBool valid_qr=PETSC_TRUE; 2189 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2190 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2191 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2192 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2193 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2194 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2195 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2196 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2197 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2198 for (jj=0;jj<size_of_constraint;jj++) { 2199 for (ii=0;ii<primal_dofs;ii++) { 2200 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2201 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2202 } 2203 } 2204 if (!valid_qr) { 2205 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2206 for (jj=0;jj<size_of_constraint;jj++) { 2207 for (ii=0;ii<primal_dofs;ii++) { 2208 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2209 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2210 } 2211 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2212 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2213 } 2214 } 2215 } 2216 } else { 2217 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2218 } 2219 } 2220 } else { /* simple transformation block */ 2221 PetscInt row,col; 2222 PetscScalar val,norm; 2223 2224 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2225 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2226 for (j=0;j<size_of_constraint;j++) { 2227 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2228 if (!PetscBTLookup(is_primal,row)) { 2229 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2230 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2231 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2232 } else { 2233 for (k=0;k<size_of_constraint;k++) { 2234 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2235 if (row != col) { 2236 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2237 } else { 2238 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2239 } 2240 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2241 } 2242 } 2243 } 2244 if (pcbddc->dbg_flag) { 2245 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2246 } 2247 } 2248 /* increment primal counter */ 2249 primal_counter += primal_dofs; 2250 } else { 2251 if (pcbddc->dbg_flag) { 2252 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2253 } 2254 } 2255 /* increment constraint counter total_counts */ 2256 total_counts += primal_dofs; 2257 } 2258 2259 /* free workspace */ 2260 if (qr_needed) { 2261 if (pcbddc->dbg_flag) { 2262 ierr = PetscFree(work);CHKERRQ(ierr); 2263 } 2264 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2265 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2266 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2267 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2268 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2269 } 2270 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2271 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2272 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2273 2274 /* assembling of global change of variable */ 2275 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2276 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2277 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2278 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2279 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2280 ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr); 2281 2282 /* nonzeros (overestimated) */ 2283 ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr); 2284 ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr); 2285 ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr); 2286 for (i=0;i<pcis->n;i++) { 2287 nnz_array[i] = 1.0; 2288 idxs_all[i] = i; 2289 } 2290 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2291 for (i=0;i<pcis->n_B;i++) { 2292 nnz_array[idxs_B[i]] = nnz[i]; 2293 } 2294 if (pcis->n) { 2295 ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr); 2296 } 2297 ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr); 2298 ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr); 2299 ierr = PetscFree(nnz);CHKERRQ(ierr); 2300 ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr); 2301 ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr); 2302 ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2303 for (i=0;i<local_size;i++) { 2304 d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size); 2305 o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size); 2306 } 2307 ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2308 ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr); 2309 ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2310 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2311 2312 /* Set identity on dirichlet dofs */ 2313 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2314 for (i=0;i<pcis->n-pcis->n_B;i++) { 2315 PetscScalar one=1.0; 2316 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr); 2317 } 2318 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2319 2320 /* Set values at interface dofs */ 2321 done = PETSC_TRUE; 2322 ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2323 if (!done) { 2324 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2325 } 2326 ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2327 ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr); 2328 ierr = ISLocalToGlobalMappingApply(pcis->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr); 2329 for (i=0;i<nvtxs;i++) { 2330 PetscInt row,*cols,ncols; 2331 PetscScalar *mat_vals; 2332 2333 row = idxs_B[i]; 2334 ncols = xadj[i+1]-xadj[i]; 2335 cols = idxs_mapped+xadj[i]; 2336 mat_vals = vals+xadj[i]; 2337 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr); 2338 } 2339 ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2340 if (!done) { 2341 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2342 } 2343 ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2344 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2345 ierr = PetscFree(idxs_mapped);CHKERRQ(ierr); 2346 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2347 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2348 2349 /* check */ 2350 if (pcbddc->dbg_flag) { 2351 PetscReal error; 2352 Vec x,x_change; 2353 2354 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2355 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2356 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2357 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2358 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2359 ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2360 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 2361 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2362 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2363 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2364 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2365 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2366 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2367 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr); 2368 ierr = VecDestroy(&x);CHKERRQ(ierr); 2369 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2370 } 2371 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2372 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2373 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2374 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2375 } 2376 2377 /* set up change of basis context */ 2378 if (pcbddc->ChangeOfBasisMatrix) { 2379 PCBDDCChange_ctx change_ctx; 2380 2381 if (!pcbddc->new_global_mat) { 2382 PetscInt global_size,local_size; 2383 2384 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2385 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2386 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2387 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2388 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2389 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2390 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2391 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2392 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2393 } else { 2394 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2395 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2396 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2397 } 2398 if (!pcbddc->user_ChangeOfBasisMatrix) { 2399 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2400 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2401 } else { 2402 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2403 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2404 } 2405 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2406 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2407 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2408 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2409 } 2410 2411 /* get indices in local ordering for vertices and constraints */ 2412 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2413 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2414 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2415 } 2416 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2417 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2418 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2419 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2420 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2421 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2422 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2423 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2424 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2425 /* set quantities in PCBDDC data struct */ 2426 pcbddc->n_actual_vertices = i; 2427 /* check if a new primal space has been introduced */ 2428 pcbddc->new_primal_space_local = PETSC_TRUE; 2429 if (olocal_primal_size == pcbddc->local_primal_size) { 2430 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2431 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2432 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2433 } 2434 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2435 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2436 2437 /* flush dbg viewer */ 2438 if (pcbddc->dbg_flag) { 2439 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2440 } 2441 2442 /* free workspace */ 2443 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2444 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2445 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2446 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2447 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2448 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2449 PetscFunctionReturn(0); 2450 } 2451 2452 #undef __FUNCT__ 2453 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2454 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2455 { 2456 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2457 PC_IS *pcis = (PC_IS*)pc->data; 2458 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2459 PetscInt ierr,i,vertex_size; 2460 PetscViewer viewer=pcbddc->dbg_viewer; 2461 2462 PetscFunctionBegin; 2463 /* Reset previously computed graph */ 2464 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2465 /* Init local Graph struct */ 2466 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2467 2468 /* Check validity of the csr graph passed in by the user */ 2469 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2470 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2471 } 2472 2473 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2474 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2475 Mat mat_adj; 2476 PetscInt *xadj,*adjncy; 2477 PetscInt nvtxs; 2478 PetscBool flg_row=PETSC_TRUE; 2479 2480 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2481 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2482 if (!flg_row) { 2483 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2484 } 2485 if (pcbddc->use_local_adj) { 2486 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2487 pcbddc->deluxe_compute_rowadj = PETSC_FALSE; 2488 } else { /* just compute subdomain's connected components */ 2489 IS is_dummy; 2490 ISLocalToGlobalMapping l2gmap_dummy; 2491 PetscInt j,sum; 2492 PetscInt *cxadj,*cadjncy; 2493 const PetscInt *idxs; 2494 PCBDDCGraph graph; 2495 PetscBT is_on_boundary; 2496 2497 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2498 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2499 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2500 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2501 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2502 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2503 graph->xadj = xadj; 2504 graph->adjncy = adjncy; 2505 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2506 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2507 2508 if (pcbddc->dbg_flag) { 2509 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2510 for (i=0;i<graph->ncc;i++) { 2511 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2512 } 2513 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2514 } 2515 2516 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2517 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2518 for (i=0;i<pcis->n_B;i++) { 2519 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2520 } 2521 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2522 2523 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2524 sum = 0; 2525 for (i=0;i<graph->ncc;i++) { 2526 PetscInt sizecc = 0; 2527 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2528 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2529 sizecc++; 2530 } 2531 } 2532 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2533 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2534 cxadj[graph->queue[j]] = sizecc; 2535 } 2536 } 2537 sum += sizecc*sizecc; 2538 } 2539 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2540 sum = 0; 2541 for (i=0;i<nvtxs;i++) { 2542 PetscInt temp = cxadj[i]; 2543 cxadj[i] = sum; 2544 sum += temp; 2545 } 2546 cxadj[nvtxs] = sum; 2547 for (i=0;i<graph->ncc;i++) { 2548 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2549 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2550 PetscInt k,sizecc = 0; 2551 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2552 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2553 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2554 sizecc++; 2555 } 2556 } 2557 } 2558 } 2559 } 2560 if (nvtxs) { 2561 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2562 } else { 2563 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2564 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2565 } 2566 graph->xadj = 0; 2567 graph->adjncy = 0; 2568 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2569 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2570 } 2571 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2572 if (!flg_row) { 2573 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2574 } 2575 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2576 } 2577 2578 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2579 vertex_size = 1; 2580 if (pcbddc->user_provided_isfordofs) { 2581 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2582 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2583 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2584 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2585 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2586 } 2587 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2588 pcbddc->n_ISForDofs = 0; 2589 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2590 } 2591 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2592 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2593 } else { 2594 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2595 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2596 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2597 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2598 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2599 } 2600 } 2601 } 2602 2603 /* Setup of Graph */ 2604 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2605 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2606 } 2607 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2608 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2609 } 2610 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2611 2612 /* Graph's connected components analysis */ 2613 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2614 2615 /* print some info to stdout */ 2616 if (pcbddc->dbg_flag) { 2617 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2618 } 2619 2620 /* mark topography has done */ 2621 pcbddc->recompute_topography = PETSC_FALSE; 2622 PetscFunctionReturn(0); 2623 } 2624 2625 #undef __FUNCT__ 2626 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2627 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2628 { 2629 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2630 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2631 PetscErrorCode ierr; 2632 2633 PetscFunctionBegin; 2634 n = 0; 2635 vertices = 0; 2636 if (pcbddc->ConstraintMatrix) { 2637 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2638 for (i=0;i<local_primal_size;i++) { 2639 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2640 if (size_of_constraint == 1) n++; 2641 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2642 } 2643 if (vertices_idx) { 2644 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2645 n = 0; 2646 for (i=0;i<local_primal_size;i++) { 2647 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2648 if (size_of_constraint == 1) { 2649 vertices[n++]=row_cmat_indices[0]; 2650 } 2651 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2652 } 2653 } 2654 } 2655 *n_vertices = n; 2656 if (vertices_idx) *vertices_idx = vertices; 2657 PetscFunctionReturn(0); 2658 } 2659 2660 #undef __FUNCT__ 2661 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2662 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2663 { 2664 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2665 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2666 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2667 PetscBT touched; 2668 PetscErrorCode ierr; 2669 2670 /* This function assumes that the number of local constraints per connected component 2671 is not greater than the number of nodes defined for the connected component 2672 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2673 PetscFunctionBegin; 2674 n = 0; 2675 constraints_index = 0; 2676 if (pcbddc->ConstraintMatrix) { 2677 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2678 max_size_of_constraint = 0; 2679 for (i=0;i<local_primal_size;i++) { 2680 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2681 if (size_of_constraint > 1) { 2682 n++; 2683 } 2684 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2685 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2686 } 2687 if (constraints_idx) { 2688 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2689 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2690 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2691 n = 0; 2692 for (i=0;i<local_primal_size;i++) { 2693 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2694 if (size_of_constraint > 1) { 2695 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2696 /* find first untouched local node */ 2697 j = 0; 2698 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2699 min_index = row_cmat_global_indices[j]; 2700 min_loc = j; 2701 /* search the minimum among nodes not yet touched on the connected component 2702 since there can be more than one constraint on a single cc */ 2703 for (j=1;j<size_of_constraint;j++) { 2704 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2705 min_index = row_cmat_global_indices[j]; 2706 min_loc = j; 2707 } 2708 } 2709 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2710 constraints_index[n++] = row_cmat_indices[min_loc]; 2711 } 2712 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2713 } 2714 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2715 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2716 } 2717 } 2718 *n_constraints = n; 2719 if (constraints_idx) *constraints_idx = constraints_index; 2720 PetscFunctionReturn(0); 2721 } 2722 2723 #undef __FUNCT__ 2724 #define __FUNCT__ "PCBDDCSubsetNumbering" 2725 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2726 { 2727 Vec local_vec,global_vec; 2728 IS seqis,paris; 2729 VecScatter scatter_ctx; 2730 PetscScalar *array; 2731 PetscInt *temp_global_dofs; 2732 PetscScalar globalsum; 2733 PetscInt i,j,s; 2734 PetscInt nlocals,first_index,old_index,max_local; 2735 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2736 PetscMPIInt *dof_sizes,*dof_displs; 2737 PetscBool first_found; 2738 PetscErrorCode ierr; 2739 2740 PetscFunctionBegin; 2741 /* mpi buffers */ 2742 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2743 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2744 j = ( !rank_prec_comm ? size_prec_comm : 0); 2745 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2746 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2747 /* get maximum size of subset */ 2748 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2749 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2750 max_local = 0; 2751 for (i=0;i<n_local_dofs;i++) { 2752 if (max_local < temp_global_dofs[i] ) { 2753 max_local = temp_global_dofs[i]; 2754 } 2755 } 2756 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2757 max_global++; 2758 max_local = 0; 2759 for (i=0;i<n_local_dofs;i++) { 2760 if (max_local < local_dofs[i] ) { 2761 max_local = local_dofs[i]; 2762 } 2763 } 2764 max_local++; 2765 /* allocate workspace */ 2766 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2767 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2768 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2769 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2770 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2771 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2772 /* create scatter */ 2773 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2774 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2775 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2776 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2777 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2778 /* init array */ 2779 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2780 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2781 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2782 if (local_dofs_mult) { 2783 for (i=0;i<n_local_dofs;i++) { 2784 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2785 } 2786 } else { 2787 for (i=0;i<n_local_dofs;i++) { 2788 array[local_dofs[i]]=1.0; 2789 } 2790 } 2791 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2792 /* scatter into global vec and get total number of global dofs */ 2793 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2794 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2795 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2796 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2797 /* Fill global_vec with cumulative function for global numbering */ 2798 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2799 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2800 nlocals = 0; 2801 first_index = -1; 2802 first_found = PETSC_FALSE; 2803 for (i=0;i<s;i++) { 2804 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2805 first_found = PETSC_TRUE; 2806 first_index = i; 2807 } 2808 nlocals += (PetscInt)PetscRealPart(array[i]); 2809 } 2810 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2811 if (!rank_prec_comm) { 2812 dof_displs[0]=0; 2813 for (i=1;i<size_prec_comm;i++) { 2814 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2815 } 2816 } 2817 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2818 if (first_found) { 2819 array[first_index] += (PetscScalar)nlocals; 2820 old_index = first_index; 2821 for (i=first_index+1;i<s;i++) { 2822 if (PetscRealPart(array[i]) > 0.1) { 2823 array[i] += array[old_index]; 2824 old_index = i; 2825 } 2826 } 2827 } 2828 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2829 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2830 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2831 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2832 /* get global ordering of local dofs */ 2833 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2834 if (local_dofs_mult) { 2835 for (i=0;i<n_local_dofs;i++) { 2836 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2837 } 2838 } else { 2839 for (i=0;i<n_local_dofs;i++) { 2840 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2841 } 2842 } 2843 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2844 /* free workspace */ 2845 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2846 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2847 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2848 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2849 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2850 /* return pointer to global ordering of local dofs */ 2851 *global_numbering_subset = temp_global_dofs; 2852 PetscFunctionReturn(0); 2853 } 2854 2855 #undef __FUNCT__ 2856 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2857 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2858 { 2859 PetscInt i,j; 2860 PetscScalar *alphas; 2861 PetscErrorCode ierr; 2862 2863 PetscFunctionBegin; 2864 /* this implements stabilized Gram-Schmidt */ 2865 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2866 for (i=0;i<n;i++) { 2867 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2868 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2869 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2870 } 2871 ierr = PetscFree(alphas);CHKERRQ(ierr); 2872 PetscFunctionReturn(0); 2873 } 2874 2875 #undef __FUNCT__ 2876 #define __FUNCT__ "MatISGetSubassemblingPattern" 2877 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2878 { 2879 Mat subdomain_adj; 2880 IS new_ranks,ranks_send_to; 2881 MatPartitioning partitioner; 2882 Mat_IS *matis; 2883 PetscInt n_neighs,*neighs,*n_shared,**shared; 2884 PetscInt prank; 2885 PetscMPIInt size,rank,color; 2886 PetscInt *xadj,*adjncy,*oldranks; 2887 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2888 PetscInt i,local_size,threshold=0; 2889 PetscErrorCode ierr; 2890 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2891 PetscSubcomm subcomm; 2892 2893 PetscFunctionBegin; 2894 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2895 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2896 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2897 2898 /* Get info on mapping */ 2899 matis = (Mat_IS*)(mat->data); 2900 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2901 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2902 2903 /* build local CSR graph of subdomains' connectivity */ 2904 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2905 xadj[0] = 0; 2906 xadj[1] = PetscMax(n_neighs-1,0); 2907 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2908 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2909 2910 if (threshold) { 2911 PetscInt xadj_count = 0; 2912 for (i=1;i<n_neighs;i++) { 2913 if (n_shared[i] > threshold) { 2914 adjncy[xadj_count] = neighs[i]; 2915 adjncy_wgt[xadj_count] = n_shared[i]; 2916 xadj_count++; 2917 } 2918 } 2919 xadj[1] = xadj_count; 2920 } else { 2921 if (xadj[1]) { 2922 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2923 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2924 } 2925 } 2926 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2927 if (use_square) { 2928 for (i=0;i<xadj[1];i++) { 2929 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2930 } 2931 } 2932 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2933 2934 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 2935 2936 /* 2937 Restrict work on active processes only. 2938 */ 2939 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2940 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2941 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2942 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2943 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2944 if (color) { 2945 ierr = PetscFree(xadj);CHKERRQ(ierr); 2946 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2947 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2948 } else { 2949 PetscInt coarsening_ratio; 2950 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2951 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2952 prank = rank; 2953 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2954 /* 2955 for (i=0;i<size;i++) { 2956 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2957 } 2958 */ 2959 for (i=0;i<xadj[1];i++) { 2960 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2961 } 2962 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2963 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2964 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2965 2966 /* Partition */ 2967 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2968 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2969 if (use_vwgt) { 2970 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 2971 v_wgt[0] = local_size; 2972 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2973 } 2974 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2975 coarsening_ratio = size/n_subdomains; 2976 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2977 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2978 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2979 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2980 2981 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2982 if (contiguous) { 2983 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2984 } else { 2985 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2986 } 2987 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2988 /* clean up */ 2989 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2990 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2991 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2992 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2993 } 2994 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2995 2996 /* assemble parallel IS for sends */ 2997 i = 1; 2998 if (color) i=0; 2999 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3000 3001 /* get back IS */ 3002 *is_sends = ranks_send_to; 3003 PetscFunctionReturn(0); 3004 } 3005 3006 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3007 3008 #undef __FUNCT__ 3009 #define __FUNCT__ "MatISSubassemble" 3010 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3011 { 3012 Mat local_mat; 3013 Mat_IS *matis; 3014 IS is_sends_internal; 3015 PetscInt rows,cols,new_local_rows; 3016 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3017 PetscBool ismatis,isdense,newisdense,destroy_mat; 3018 ISLocalToGlobalMapping l2gmap; 3019 PetscInt* l2gmap_indices; 3020 const PetscInt* is_indices; 3021 MatType new_local_type; 3022 /* buffers */ 3023 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3024 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3025 PetscInt *recv_buffer_idxs_local; 3026 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3027 /* MPI */ 3028 MPI_Comm comm,comm_n; 3029 PetscSubcomm subcomm; 3030 PetscMPIInt n_sends,n_recvs,commsize; 3031 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3032 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3033 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3034 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3035 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3036 PetscErrorCode ierr; 3037 3038 PetscFunctionBegin; 3039 /* TODO: add missing checks */ 3040 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3041 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3042 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3043 PetscValidLogicalCollectiveInt(mat,nis,7); 3044 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3045 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3046 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3047 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3048 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3049 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3050 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3051 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3052 PetscInt mrows,mcols,mnrows,mncols; 3053 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3054 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3055 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3056 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3057 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3058 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3059 } 3060 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3061 PetscValidLogicalCollectiveInt(mat,bs,0); 3062 /* prepare IS for sending if not provided */ 3063 if (!is_sends) { 3064 PetscBool pcontig = PETSC_TRUE; 3065 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3066 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3067 } else { 3068 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3069 is_sends_internal = is_sends; 3070 } 3071 3072 /* get pointer of MATIS data */ 3073 matis = (Mat_IS*)mat->data; 3074 3075 /* get comm */ 3076 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3077 3078 /* compute number of sends */ 3079 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3080 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3081 3082 /* compute number of receives */ 3083 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3084 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3085 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3086 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3087 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3088 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3089 ierr = PetscFree(iflags);CHKERRQ(ierr); 3090 3091 /* restrict comm if requested */ 3092 subcomm = 0; 3093 destroy_mat = PETSC_FALSE; 3094 if (restrict_comm) { 3095 PetscMPIInt color,rank,subcommsize; 3096 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3097 color = 0; 3098 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3099 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3100 subcommsize = commsize - subcommsize; 3101 /* check if reuse has been requested */ 3102 if (reuse == MAT_REUSE_MATRIX) { 3103 if (*mat_n) { 3104 PetscMPIInt subcommsize2; 3105 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3106 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3107 comm_n = PetscObjectComm((PetscObject)*mat_n); 3108 } else { 3109 comm_n = PETSC_COMM_SELF; 3110 } 3111 } else { /* MAT_INITIAL_MATRIX */ 3112 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3113 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3114 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3115 comm_n = subcomm->comm; 3116 } 3117 /* flag to destroy *mat_n if not significative */ 3118 if (color) destroy_mat = PETSC_TRUE; 3119 } else { 3120 comm_n = comm; 3121 } 3122 3123 /* prepare send/receive buffers */ 3124 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3125 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3126 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3127 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3128 if (nis) { 3129 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3130 } 3131 3132 /* Get data from local matrices */ 3133 if (!isdense) { 3134 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3135 /* TODO: See below some guidelines on how to prepare the local buffers */ 3136 /* 3137 send_buffer_vals should contain the raw values of the local matrix 3138 send_buffer_idxs should contain: 3139 - MatType_PRIVATE type 3140 - PetscInt size_of_l2gmap 3141 - PetscInt global_row_indices[size_of_l2gmap] 3142 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3143 */ 3144 } else { 3145 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3146 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3147 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3148 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3149 send_buffer_idxs[1] = i; 3150 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3151 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3152 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3153 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3154 for (i=0;i<n_sends;i++) { 3155 ilengths_vals[is_indices[i]] = len*len; 3156 ilengths_idxs[is_indices[i]] = len+2; 3157 } 3158 } 3159 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3160 /* additional is (if any) */ 3161 if (nis) { 3162 PetscMPIInt psum; 3163 PetscInt j; 3164 for (j=0,psum=0;j<nis;j++) { 3165 PetscInt plen; 3166 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3167 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3168 psum += len+1; /* indices + lenght */ 3169 } 3170 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3171 for (j=0,psum=0;j<nis;j++) { 3172 PetscInt plen; 3173 const PetscInt *is_array_idxs; 3174 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3175 send_buffer_idxs_is[psum] = plen; 3176 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3177 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3178 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3179 psum += plen+1; /* indices + lenght */ 3180 } 3181 for (i=0;i<n_sends;i++) { 3182 ilengths_idxs_is[is_indices[i]] = psum; 3183 } 3184 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3185 } 3186 3187 buf_size_idxs = 0; 3188 buf_size_vals = 0; 3189 buf_size_idxs_is = 0; 3190 for (i=0;i<n_recvs;i++) { 3191 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3192 buf_size_vals += (PetscInt)olengths_vals[i]; 3193 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3194 } 3195 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3196 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3197 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3198 3199 /* get new tags for clean communications */ 3200 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3201 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3202 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3203 3204 /* allocate for requests */ 3205 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3206 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3207 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3208 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3209 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3210 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3211 3212 /* communications */ 3213 ptr_idxs = recv_buffer_idxs; 3214 ptr_vals = recv_buffer_vals; 3215 ptr_idxs_is = recv_buffer_idxs_is; 3216 for (i=0;i<n_recvs;i++) { 3217 source_dest = onodes[i]; 3218 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3219 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3220 ptr_idxs += olengths_idxs[i]; 3221 ptr_vals += olengths_vals[i]; 3222 if (nis) { 3223 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3224 ptr_idxs_is += olengths_idxs_is[i]; 3225 } 3226 } 3227 for (i=0;i<n_sends;i++) { 3228 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3229 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3230 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3231 if (nis) { 3232 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3233 } 3234 } 3235 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3236 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3237 3238 /* assemble new l2g map */ 3239 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3240 ptr_idxs = recv_buffer_idxs; 3241 new_local_rows = 0; 3242 for (i=0;i<n_recvs;i++) { 3243 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3244 ptr_idxs += olengths_idxs[i]; 3245 } 3246 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3247 ptr_idxs = recv_buffer_idxs; 3248 new_local_rows = 0; 3249 for (i=0;i<n_recvs;i++) { 3250 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3251 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3252 ptr_idxs += olengths_idxs[i]; 3253 } 3254 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3255 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3256 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3257 3258 /* infer new local matrix type from received local matrices type */ 3259 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3260 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3261 if (n_recvs) { 3262 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3263 ptr_idxs = recv_buffer_idxs; 3264 for (i=0;i<n_recvs;i++) { 3265 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3266 new_local_type_private = MATAIJ_PRIVATE; 3267 break; 3268 } 3269 ptr_idxs += olengths_idxs[i]; 3270 } 3271 switch (new_local_type_private) { 3272 case MATDENSE_PRIVATE: 3273 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3274 new_local_type = MATSEQAIJ; 3275 bs = 1; 3276 } else { /* if I receive only 1 dense matrix */ 3277 new_local_type = MATSEQDENSE; 3278 bs = 1; 3279 } 3280 break; 3281 case MATAIJ_PRIVATE: 3282 new_local_type = MATSEQAIJ; 3283 bs = 1; 3284 break; 3285 case MATBAIJ_PRIVATE: 3286 new_local_type = MATSEQBAIJ; 3287 break; 3288 case MATSBAIJ_PRIVATE: 3289 new_local_type = MATSEQSBAIJ; 3290 break; 3291 default: 3292 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3293 break; 3294 } 3295 } else { /* by default, new_local_type is seqdense */ 3296 new_local_type = MATSEQDENSE; 3297 bs = 1; 3298 } 3299 3300 /* create MATIS object if needed */ 3301 if (reuse == MAT_INITIAL_MATRIX) { 3302 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3303 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3304 } else { 3305 /* it also destroys the local matrices */ 3306 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3307 } 3308 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3309 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3310 3311 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3312 3313 /* Global to local map of received indices */ 3314 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3315 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3316 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3317 3318 /* restore attributes -> type of incoming data and its size */ 3319 buf_size_idxs = 0; 3320 for (i=0;i<n_recvs;i++) { 3321 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3322 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3323 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3324 } 3325 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3326 3327 /* set preallocation */ 3328 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3329 if (!newisdense) { 3330 PetscInt *new_local_nnz=0; 3331 3332 ptr_vals = recv_buffer_vals; 3333 ptr_idxs = recv_buffer_idxs_local; 3334 if (n_recvs) { 3335 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3336 } 3337 for (i=0;i<n_recvs;i++) { 3338 PetscInt j; 3339 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3340 for (j=0;j<*(ptr_idxs+1);j++) { 3341 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3342 } 3343 } else { 3344 /* TODO */ 3345 } 3346 ptr_idxs += olengths_idxs[i]; 3347 } 3348 if (new_local_nnz) { 3349 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3350 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3351 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3352 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3353 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3354 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3355 } else { 3356 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3357 } 3358 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3359 } else { 3360 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3361 } 3362 3363 /* set values */ 3364 ptr_vals = recv_buffer_vals; 3365 ptr_idxs = recv_buffer_idxs_local; 3366 for (i=0;i<n_recvs;i++) { 3367 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3368 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3369 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3370 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3371 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3372 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3373 } else { 3374 /* TODO */ 3375 } 3376 ptr_idxs += olengths_idxs[i]; 3377 ptr_vals += olengths_vals[i]; 3378 } 3379 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3380 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3381 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3382 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3383 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3384 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3385 3386 #if 0 3387 if (!restrict_comm) { /* check */ 3388 Vec lvec,rvec; 3389 PetscReal infty_error; 3390 3391 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3392 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3393 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3394 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3395 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3396 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3397 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3398 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3399 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3400 } 3401 #endif 3402 3403 /* assemble new additional is (if any) */ 3404 if (nis) { 3405 PetscInt **temp_idxs,*count_is,j,psum; 3406 3407 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3408 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3409 ptr_idxs = recv_buffer_idxs_is; 3410 psum = 0; 3411 for (i=0;i<n_recvs;i++) { 3412 for (j=0;j<nis;j++) { 3413 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3414 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3415 psum += plen; 3416 ptr_idxs += plen+1; /* shift pointer to received data */ 3417 } 3418 } 3419 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3420 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3421 for (i=1;i<nis;i++) { 3422 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3423 } 3424 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3425 ptr_idxs = recv_buffer_idxs_is; 3426 for (i=0;i<n_recvs;i++) { 3427 for (j=0;j<nis;j++) { 3428 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3429 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3430 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3431 ptr_idxs += plen+1; /* shift pointer to received data */ 3432 } 3433 } 3434 for (i=0;i<nis;i++) { 3435 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3436 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3437 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3438 } 3439 ierr = PetscFree(count_is);CHKERRQ(ierr); 3440 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3441 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3442 } 3443 /* free workspace */ 3444 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3445 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3446 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3447 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3448 if (isdense) { 3449 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3450 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3451 } else { 3452 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3453 } 3454 if (nis) { 3455 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3456 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3457 } 3458 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3459 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3460 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3461 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3462 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3463 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3464 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3465 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3466 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3467 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3468 ierr = PetscFree(onodes);CHKERRQ(ierr); 3469 if (nis) { 3470 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3471 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3472 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3473 } 3474 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3475 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3476 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3477 for (i=0;i<nis;i++) { 3478 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3479 } 3480 } 3481 PetscFunctionReturn(0); 3482 } 3483 3484 /* temporary hack into ksp private data structure */ 3485 #include <petsc-private/kspimpl.h> 3486 3487 #undef __FUNCT__ 3488 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3489 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3490 { 3491 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3492 PC_IS *pcis = (PC_IS*)pc->data; 3493 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3494 MatNullSpace CoarseNullSpace=NULL; 3495 ISLocalToGlobalMapping coarse_islg; 3496 IS coarse_is,*isarray; 3497 PetscInt i,im_active=-1,active_procs=-1; 3498 PetscInt nis,nisdofs,nisneu; 3499 PC pc_temp; 3500 PCType coarse_pc_type; 3501 KSPType coarse_ksp_type; 3502 PetscBool multilevel_requested,multilevel_allowed; 3503 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3504 Mat t_coarse_mat_is; 3505 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3506 PetscMPIInt all_procs; 3507 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3508 PetscBool compute_vecs = PETSC_FALSE; 3509 PetscScalar *array; 3510 PetscErrorCode ierr; 3511 3512 PetscFunctionBegin; 3513 /* Assign global numbering to coarse dofs */ 3514 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3515 compute_vecs = PETSC_TRUE; 3516 PetscInt ocoarse_size; 3517 ocoarse_size = pcbddc->coarse_size; 3518 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3519 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3520 /* see if we can avoid some work */ 3521 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3522 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3523 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3524 coarse_reuse = PETSC_FALSE; 3525 } else { /* we can safely reuse already computed coarse matrix */ 3526 coarse_reuse = PETSC_TRUE; 3527 } 3528 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3529 coarse_reuse = PETSC_FALSE; 3530 } 3531 /* reset any subassembling information */ 3532 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3533 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3534 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3535 coarse_reuse = PETSC_TRUE; 3536 } 3537 3538 /* count "active" (i.e. with positive local size) and "void" processes */ 3539 im_active = !!(pcis->n); 3540 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3541 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3542 void_procs = all_procs-active_procs; 3543 csin_type_simple = PETSC_TRUE; 3544 redist = PETSC_FALSE; 3545 if (pcbddc->current_level && void_procs) { 3546 csin_ml = PETSC_TRUE; 3547 ncoarse_ml = void_procs; 3548 csin_ds = PETSC_TRUE; 3549 ncoarse_ds = void_procs; 3550 } else { 3551 csin_ml = PETSC_FALSE; 3552 ncoarse_ml = all_procs; 3553 if (void_procs) { 3554 csin_ds = PETSC_TRUE; 3555 ncoarse_ds = void_procs; 3556 csin_type_simple = PETSC_FALSE; 3557 } else { 3558 if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) { 3559 csin_ds = PETSC_TRUE; 3560 ncoarse_ds = pcbddc->redistribute_coarse; 3561 redist = PETSC_TRUE; 3562 } else { 3563 csin_ds = PETSC_FALSE; 3564 ncoarse_ds = all_procs; 3565 } 3566 } 3567 } 3568 3569 /* 3570 test if we can go multilevel: three conditions must be satisfied: 3571 - we have not exceeded the number of levels requested 3572 - we can actually subassemble the active processes 3573 - we can find a suitable number of MPI processes where we can place the subassembled problem 3574 */ 3575 multilevel_allowed = PETSC_FALSE; 3576 multilevel_requested = PETSC_FALSE; 3577 if (pcbddc->current_level < pcbddc->max_levels) { 3578 multilevel_requested = PETSC_TRUE; 3579 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3580 multilevel_allowed = PETSC_FALSE; 3581 } else { 3582 multilevel_allowed = PETSC_TRUE; 3583 } 3584 } 3585 /* determine number of process partecipating to coarse solver */ 3586 if (multilevel_allowed) { 3587 ncoarse = ncoarse_ml; 3588 csin = csin_ml; 3589 } else { 3590 ncoarse = ncoarse_ds; 3591 csin = csin_ds; 3592 } 3593 3594 /* creates temporary l2gmap and IS for coarse indexes */ 3595 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3596 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3597 3598 /* creates temporary MATIS object for coarse matrix */ 3599 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3600 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3601 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3602 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3603 #if 0 3604 { 3605 PetscViewer viewer; 3606 char filename[256]; 3607 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3608 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3609 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3610 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3611 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3612 } 3613 #endif 3614 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3615 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3616 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3617 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3618 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3619 3620 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3621 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3622 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3623 const PetscInt *idxs; 3624 ISLocalToGlobalMapping tmap; 3625 3626 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3627 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3628 /* allocate space for temporary storage */ 3629 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3630 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3631 /* allocate for IS array */ 3632 nisdofs = pcbddc->n_ISForDofsLocal; 3633 nisneu = !!pcbddc->NeumannBoundariesLocal; 3634 nis = nisdofs + nisneu; 3635 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3636 /* dofs splitting */ 3637 for (i=0;i<nisdofs;i++) { 3638 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3639 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3640 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3641 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3642 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3643 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3644 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3645 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3646 } 3647 /* neumann boundaries */ 3648 if (pcbddc->NeumannBoundariesLocal) { 3649 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3650 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3651 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3652 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3653 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3654 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3655 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3656 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3657 } 3658 /* free memory */ 3659 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3660 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3661 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3662 } else { 3663 nis = 0; 3664 nisdofs = 0; 3665 nisneu = 0; 3666 isarray = NULL; 3667 } 3668 /* destroy no longer needed map */ 3669 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3670 3671 /* restrict on coarse candidates (if needed) */ 3672 coarse_mat_is = NULL; 3673 if (csin) { 3674 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3675 if (redist) { 3676 PetscMPIInt rank; 3677 PetscInt spc,n_spc_p1,dest[1]; 3678 3679 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3680 spc = all_procs/pcbddc->redistribute_coarse; 3681 n_spc_p1 = all_procs%pcbddc->redistribute_coarse; 3682 if (rank > n_spc_p1*(spc+1)-1) { 3683 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 3684 } else { 3685 dest[0] = rank/(spc+1); 3686 } 3687 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3688 } else { 3689 PetscInt j,tissize,*nisindices; 3690 PetscInt *coarse_candidates; 3691 const PetscInt* tisindices; 3692 /* get coarse candidates' ranks in pc communicator */ 3693 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 3694 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3695 for (i=0,j=0;i<all_procs;i++) { 3696 if (!coarse_candidates[i]) { 3697 coarse_candidates[j]=i; 3698 j++; 3699 } 3700 } 3701 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3702 /* get a suitable subassembling pattern */ 3703 if (csin_type_simple) { 3704 PetscMPIInt rank; 3705 PetscInt issize,isidx; 3706 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3707 if (im_active) { 3708 issize = 1; 3709 isidx = (PetscInt)rank; 3710 } else { 3711 issize = 0; 3712 isidx = -1; 3713 } 3714 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3715 } else { 3716 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3717 } 3718 if (pcbddc->dbg_flag) { 3719 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3720 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3721 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3722 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3723 for (i=0;i<j;i++) { 3724 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3725 } 3726 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3727 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3728 } 3729 /* shift the pattern on coarse candidates */ 3730 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3731 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3732 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 3733 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3734 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3735 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3736 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3737 } 3738 } 3739 if (pcbddc->dbg_flag) { 3740 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3741 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3742 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3743 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3744 } 3745 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3746 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3747 } else { 3748 if (pcbddc->dbg_flag) { 3749 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3750 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3751 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3752 } 3753 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3754 coarse_mat_is = t_coarse_mat_is; 3755 } 3756 3757 /* create local to global scatters for coarse problem */ 3758 if (compute_vecs) { 3759 PetscInt lrows; 3760 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3761 if (coarse_mat_is) { 3762 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3763 } else { 3764 lrows = 0; 3765 } 3766 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3767 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3768 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3769 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3770 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3771 } 3772 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3773 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3774 3775 /* set defaults for coarse KSP and PC */ 3776 if (multilevel_allowed) { 3777 coarse_ksp_type = KSPRICHARDSON; 3778 coarse_pc_type = PCBDDC; 3779 } else { 3780 coarse_ksp_type = KSPPREONLY; 3781 coarse_pc_type = PCREDUNDANT; 3782 } 3783 3784 /* print some info if requested */ 3785 if (pcbddc->dbg_flag) { 3786 if (!multilevel_allowed) { 3787 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3788 if (multilevel_requested) { 3789 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3790 } else if (pcbddc->max_levels) { 3791 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3792 } 3793 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3794 } 3795 } 3796 3797 /* create the coarse KSP object only once with defaults */ 3798 if (coarse_mat_is) { 3799 MatReuse coarse_mat_reuse; 3800 PetscViewer dbg_viewer = NULL; 3801 if (pcbddc->dbg_flag) { 3802 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3803 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3804 } 3805 if (!pcbddc->coarse_ksp) { 3806 char prefix[256],str_level[16]; 3807 size_t len; 3808 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3809 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3810 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3811 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3812 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3813 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3814 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3815 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3816 /* prefix */ 3817 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3818 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3819 if (!pcbddc->current_level) { 3820 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3821 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3822 } else { 3823 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3824 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3825 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3826 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3827 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3828 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3829 } 3830 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3831 /* allow user customization */ 3832 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3833 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3834 } 3835 3836 /* get some info after set from options */ 3837 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3838 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3839 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3840 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3841 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3842 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3843 isbddc = PETSC_FALSE; 3844 } 3845 if (isredundant) { 3846 KSP inner_ksp; 3847 PC inner_pc; 3848 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3849 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3850 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3851 } 3852 3853 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3854 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3855 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3856 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3857 if (nisdofs) { 3858 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3859 for (i=0;i<nisdofs;i++) { 3860 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3861 } 3862 } 3863 if (nisneu) { 3864 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3865 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3866 } 3867 3868 /* assemble coarse matrix */ 3869 if (coarse_reuse) { 3870 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3871 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3872 coarse_mat_reuse = MAT_REUSE_MATRIX; 3873 } else { 3874 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3875 } 3876 if (isbddc || isnn) { 3877 if (pcbddc->coarsening_ratio > 1) { 3878 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3879 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3880 if (pcbddc->dbg_flag) { 3881 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3882 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3883 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3884 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3885 } 3886 } 3887 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3888 } else { 3889 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 3890 coarse_mat = coarse_mat_is; 3891 } 3892 } else { 3893 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3894 } 3895 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3896 3897 /* propagate symmetry info to coarse matrix */ 3898 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3899 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3900 3901 /* set operators */ 3902 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3903 if (pcbddc->dbg_flag) { 3904 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3905 } 3906 } else { /* processes non partecipating to coarse solver (if any) */ 3907 coarse_mat = 0; 3908 } 3909 ierr = PetscFree(isarray);CHKERRQ(ierr); 3910 #if 0 3911 { 3912 PetscViewer viewer; 3913 char filename[256]; 3914 sprintf(filename,"coarse_mat.m"); 3915 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3916 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3917 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3918 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3919 } 3920 #endif 3921 3922 /* Compute coarse null space (special handling by BDDC only) */ 3923 if (pcbddc->NullSpace) { 3924 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3925 } 3926 3927 if (pcbddc->coarse_ksp) { 3928 Vec crhs,csol; 3929 PetscBool ispreonly; 3930 if (CoarseNullSpace) { 3931 if (isbddc) { 3932 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3933 } else { 3934 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3935 } 3936 } 3937 /* setup coarse ksp */ 3938 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3939 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3940 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3941 /* hack */ 3942 if (!csol) { 3943 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3944 } 3945 if (!crhs) { 3946 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3947 } 3948 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3949 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3950 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3951 KSP check_ksp; 3952 KSPType check_ksp_type; 3953 PC check_pc; 3954 Vec check_vec,coarse_vec; 3955 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 3956 PetscInt its; 3957 PetscBool compute_eigs; 3958 PetscReal *eigs_r,*eigs_c; 3959 PetscInt neigs; 3960 const char *prefix; 3961 3962 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3963 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3964 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3965 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3966 if (ispreonly) { 3967 check_ksp_type = KSPPREONLY; 3968 compute_eigs = PETSC_FALSE; 3969 } else { 3970 check_ksp_type = KSPGMRES; 3971 compute_eigs = PETSC_TRUE; 3972 } 3973 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3974 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3975 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3976 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3977 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 3978 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 3979 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 3980 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 3981 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3982 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3983 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3984 /* create random vec */ 3985 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 3986 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 3987 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3988 if (CoarseNullSpace) { 3989 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3990 } 3991 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3992 /* solve coarse problem */ 3993 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 3994 if (CoarseNullSpace) { 3995 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 3996 } 3997 /* set eigenvalue estimation if preonly has not been requested */ 3998 if (compute_eigs) { 3999 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4000 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4001 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4002 lambda_max = eigs_r[neigs-1]; 4003 lambda_min = eigs_r[0]; 4004 if (pcbddc->use_coarse_estimates) { 4005 if (lambda_max>lambda_min) { 4006 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4007 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4008 } 4009 } 4010 } 4011 4012 /* check coarse problem residual error */ 4013 if (pcbddc->dbg_flag) { 4014 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4015 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4016 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4017 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4018 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4019 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4020 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4021 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4022 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4023 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4024 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4025 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4026 if (compute_eigs) { 4027 PetscReal lambda_max_s,lambda_min_s; 4028 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4029 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4030 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4031 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4032 for (i=0;i<neigs;i++) { 4033 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4034 } 4035 } 4036 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4037 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4038 } 4039 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4040 if (compute_eigs) { 4041 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4042 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4043 } 4044 } 4045 } 4046 /* print additional info */ 4047 if (pcbddc->dbg_flag) { 4048 /* waits until all processes reaches this point */ 4049 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4050 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4051 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4052 } 4053 4054 /* free memory */ 4055 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4056 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4057 PetscFunctionReturn(0); 4058 } 4059 4060 #undef __FUNCT__ 4061 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4062 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4063 { 4064 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4065 PC_IS* pcis = (PC_IS*)pc->data; 4066 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4067 PetscInt i,coarse_size; 4068 PetscInt *local_primal_indices; 4069 PetscErrorCode ierr; 4070 4071 PetscFunctionBegin; 4072 /* Compute global number of coarse dofs */ 4073 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4074 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4075 } 4076 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4077 4078 /* check numbering */ 4079 if (pcbddc->dbg_flag) { 4080 PetscScalar coarsesum,*array; 4081 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4082 4083 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4084 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4085 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4086 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4087 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4088 for (i=0;i<pcbddc->local_primal_size;i++) { 4089 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4090 } 4091 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4092 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4093 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4094 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4095 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4096 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4097 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4098 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4099 for (i=0;i<pcis->n;i++) { 4100 if (array[i] == 1.0) { 4101 set_error = PETSC_TRUE; 4102 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4103 } 4104 } 4105 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4106 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4107 for (i=0;i<pcis->n;i++) { 4108 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4109 } 4110 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4111 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4112 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4113 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4114 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4115 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4116 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4117 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4118 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4119 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4120 for (i=0;i<pcbddc->local_primal_size;i++) { 4121 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4122 } 4123 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4124 } 4125 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4126 if (set_error_reduced) { 4127 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4128 } 4129 } 4130 /* get back data */ 4131 *coarse_size_n = coarse_size; 4132 *local_primal_indices_n = local_primal_indices; 4133 PetscFunctionReturn(0); 4134 } 4135 4136 #undef __FUNCT__ 4137 #define __FUNCT__ "PCBDDCGlobalToLocal" 4138 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4139 { 4140 IS localis_t; 4141 PetscInt i,lsize,*idxs,n; 4142 PetscScalar *vals; 4143 PetscErrorCode ierr; 4144 4145 PetscFunctionBegin; 4146 /* get indices in local ordering exploiting local to global map */ 4147 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4148 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4149 for (i=0;i<lsize;i++) vals[i] = 1.0; 4150 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4151 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4152 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4153 if (idxs) { /* multilevel guard */ 4154 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4155 } 4156 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4157 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4158 ierr = PetscFree(vals);CHKERRQ(ierr); 4159 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4160 /* now compute set in local ordering */ 4161 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4162 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4163 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4164 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4165 for (i=0,lsize=0;i<n;i++) { 4166 if (PetscRealPart(vals[i]) > 0.5) { 4167 lsize++; 4168 } 4169 } 4170 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4171 for (i=0,lsize=0;i<n;i++) { 4172 if (PetscRealPart(vals[i]) > 0.5) { 4173 idxs[lsize++] = i; 4174 } 4175 } 4176 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4177 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4178 *localis = localis_t; 4179 PetscFunctionReturn(0); 4180 } 4181 4182 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4183 #undef __FUNCT__ 4184 #define __FUNCT__ "PCBDDCMatMult_Private" 4185 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4186 { 4187 PCBDDCChange_ctx change_ctx; 4188 PetscErrorCode ierr; 4189 4190 PetscFunctionBegin; 4191 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4192 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4193 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4194 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4195 PetscFunctionReturn(0); 4196 } 4197 4198 #undef __FUNCT__ 4199 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4200 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4201 { 4202 PCBDDCChange_ctx change_ctx; 4203 PetscErrorCode ierr; 4204 4205 PetscFunctionBegin; 4206 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4207 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4208 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4209 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4210 PetscFunctionReturn(0); 4211 } 4212