1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCSetUpSolvers" 10 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscScalar *coarse_submat_vals; 14 PetscErrorCode ierr; 15 16 PetscFunctionBegin; 17 /* Compute matrix after change of basis and extract local submatrices */ 18 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D */ 21 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 22 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 23 24 /* Setup local solvers ksp_D and ksp_R */ 25 /* PCBDDCSetUpLocalScatters should be called first! */ 26 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 27 28 /* Change global null space passed in by the user if change of basis has been requested */ 29 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 30 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 31 } 32 33 /* 34 Setup local correction and local part of coarse basis. 35 Gives back the dense local part of the coarse matrix in column major ordering 36 */ 37 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 38 39 /* Compute total number of coarse nodes and setup coarse solver */ 40 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 41 42 /* free */ 43 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 #undef __FUNCT__ 48 #define __FUNCT__ "PCBDDCResetCustomization" 49 PetscErrorCode PCBDDCResetCustomization(PC pc) 50 { 51 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 52 PetscErrorCode ierr; 53 54 PetscFunctionBegin; 55 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 57 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 59 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 61 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 62 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 63 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 64 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 65 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 #undef __FUNCT__ 70 #define __FUNCT__ "PCBDDCResetTopography" 71 PetscErrorCode PCBDDCResetTopography(PC pc) 72 { 73 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 74 PetscErrorCode ierr; 75 76 PetscFunctionBegin; 77 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 78 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 79 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 80 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 81 PetscFunctionReturn(0); 82 } 83 84 #undef __FUNCT__ 85 #define __FUNCT__ "PCBDDCResetSolvers" 86 PetscErrorCode PCBDDCResetSolvers(PC pc) 87 { 88 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 89 PetscErrorCode ierr; 90 91 PetscFunctionBegin; 92 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 93 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 100 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 101 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 102 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 103 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 104 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 106 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 107 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 108 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 109 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 110 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 111 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 112 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 113 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 114 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 115 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 116 PetscFunctionReturn(0); 117 } 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 121 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 122 { 123 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 124 PC_IS *pcis = (PC_IS*)pc->data; 125 VecType impVecType; 126 PetscInt n_constraints,n_R,old_size; 127 PetscErrorCode ierr; 128 129 PetscFunctionBegin; 130 if (!pcbddc->ConstraintMatrix) { 131 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 132 } 133 /* get sizes */ 134 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 135 n_R = pcis->n-pcbddc->n_actual_vertices; 136 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 137 /* local work vectors (try to avoid unneeded work)*/ 138 /* R nodes */ 139 old_size = -1; 140 if (pcbddc->vec1_R) { 141 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 142 } 143 if (n_R != old_size) { 144 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 145 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 146 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 147 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 148 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 149 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 150 } 151 /* local primal dofs */ 152 old_size = -1; 153 if (pcbddc->vec1_P) { 154 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 155 } 156 if (pcbddc->local_primal_size != old_size) { 157 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 158 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 159 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 160 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 161 } 162 /* local explicit constraints */ 163 old_size = -1; 164 if (pcbddc->vec1_C) { 165 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 166 } 167 if (n_constraints && n_constraints != old_size) { 168 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 169 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 170 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 171 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 172 } 173 PetscFunctionReturn(0); 174 } 175 176 #undef __FUNCT__ 177 #define __FUNCT__ "PCBDDCSetUpCorrection" 178 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 179 { 180 PetscErrorCode ierr; 181 /* pointers to pcis and pcbddc */ 182 PC_IS* pcis = (PC_IS*)pc->data; 183 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 184 /* submatrices of local problem */ 185 Mat A_RV,A_VR,A_VV; 186 /* working matrices */ 187 Mat M1,M2,M3,C_CR; 188 /* working vectors */ 189 Vec vec1_C,vec2_C,vec1_V,vec2_V; 190 /* additional working stuff */ 191 IS is_aux; 192 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 193 const PetscScalar *array,*row_cmat_values; 194 const PetscInt *row_cmat_indices,*idx_R_local; 195 PetscInt *idx_V_B,*auxindices; 196 PetscInt n_vertices,n_constraints,size_of_constraint; 197 PetscInt i,j,n_R,n_D,n_B; 198 PetscBool unsymmetric_check; 199 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 200 MatType impMatType; 201 /* some shortcuts to scalars */ 202 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 203 /* for debugging purposes */ 204 PetscReal *coarsefunctions_errors,*constraints_errors; 205 206 PetscFunctionBegin; 207 /* get number of vertices (corners plus constraints with change of basis) 208 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 209 n_vertices = pcbddc->n_actual_vertices; 210 n_constraints = pcbddc->local_primal_size-n_vertices; 211 /* Set Non-overlapping dimensions */ 212 n_B = pcis->n_B; n_D = pcis->n - n_B; 213 n_R = pcis->n-n_vertices; 214 215 /* Set types for local objects needed by BDDC precondtioner */ 216 impMatType = MATSEQDENSE; 217 218 /* Allocating some extra storage just to be safe */ 219 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 220 for (i=0;i<pcis->n;i++) auxindices[i]=i; 221 222 /* vertices in boundary numbering */ 223 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 224 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 225 if (i != n_vertices) { 226 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 227 } 228 229 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 230 if (n_constraints) { 231 /* see if we can save some allocations */ 232 if (pcbddc->local_auxmat2) { 233 PetscInt on_R,on_constraints; 234 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 235 if (on_R != n_R || on_constraints != n_constraints) { 236 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 237 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 238 } 239 } 240 /* work vectors */ 241 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 242 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 243 /* auxiliary matrices */ 244 if (!pcbddc->local_auxmat2) { 245 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 246 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 247 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 248 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 249 } 250 251 /* Extract constraints on R nodes: C_{CR} */ 252 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 253 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 254 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 255 256 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 257 for (i=0;i<n_constraints;i++) { 258 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 259 /* Get row of constraint matrix in R numbering */ 260 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 261 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 262 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 263 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 264 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 265 /* Solve for row of constraint matrix in R numbering */ 266 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 267 /* Set values in local_auxmat2 */ 268 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 269 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 270 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 271 } 272 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 273 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 274 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 275 276 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 277 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 278 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 279 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 280 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 281 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 282 ierr = MatSetUp(M1);CHKERRQ(ierr); 283 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 284 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 285 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 286 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 287 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 288 ierr = MatDestroy(&M2);CHKERRQ(ierr); 289 ierr = MatDestroy(&M3);CHKERRQ(ierr); 290 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 291 if (!pcbddc->local_auxmat1) { 292 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 293 } else { 294 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 295 } 296 } 297 298 /* Get submatrices from subdomain matrix */ 299 if (n_vertices) { 300 PetscInt ibs,mbs; 301 PetscBool issbaij; 302 Mat newmat; 303 304 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 305 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 306 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 307 if (ibs != mbs) { /* need to convert to SEQAIJ */ 308 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 309 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 310 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 311 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 312 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 313 } else { 314 /* this is safe */ 315 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 316 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 317 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 318 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 319 /* which of the two approaches is faster? */ 320 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 321 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 322 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 323 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 324 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 325 } else { 326 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 327 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 328 } 329 } 330 ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 331 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 332 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 333 } 334 335 /* Matrix of coarse basis functions (local) */ 336 if (pcbddc->coarse_phi_B) { 337 PetscInt on_B,on_primal; 338 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 339 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 340 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 341 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 342 } 343 } 344 if (pcbddc->coarse_phi_D) { 345 PetscInt on_D,on_primal; 346 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 347 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 348 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 349 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 350 } 351 } 352 if (!pcbddc->coarse_phi_B) { 353 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 354 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 355 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 356 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 357 } 358 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 359 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 360 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 361 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 362 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 363 } 364 365 if (pcbddc->dbg_flag) { 366 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 367 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 368 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 369 } 370 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 371 ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr); 372 373 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 374 375 /* vertices */ 376 for (i=0;i<n_vertices;i++) { 377 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 378 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 379 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 380 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 381 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 382 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 383 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 384 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 385 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 386 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 387 if (n_constraints) { 388 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 389 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 390 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 391 } 392 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 393 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 394 395 /* Set values in coarse basis function and subdomain part of coarse_mat */ 396 /* coarse basis functions */ 397 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 398 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 399 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 400 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 401 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 402 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 403 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 404 if (pcbddc->switch_static || pcbddc->dbg_flag) { 405 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 406 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 407 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 408 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 409 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 410 } 411 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 412 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 413 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 415 if (n_constraints) { 416 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 417 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 418 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 419 } 420 421 /* check */ 422 if (pcbddc->dbg_flag) { 423 /* assemble subdomain vector on local nodes */ 424 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 425 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 426 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 427 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 428 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 429 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 430 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 431 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 432 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 433 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 434 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 436 if (n_constraints) { 437 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 438 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 439 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 440 } 441 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 442 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 443 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 444 /* check saddle point solution */ 445 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 446 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 447 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 448 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 449 /* shift by the identity matrix */ 450 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 451 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 452 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 453 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 454 } 455 } 456 457 /* constraints */ 458 for (i=0;i<n_constraints;i++) { 459 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 460 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 461 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 462 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 463 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 464 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 465 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 466 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 467 if (n_vertices) { 468 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 469 } 470 /* Set values in coarse basis function and subdomain part of coarse_mat */ 471 /* coarse basis functions */ 472 j = i+n_vertices; /* don't touch this! */ 473 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 474 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 475 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 476 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 477 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 478 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 479 if (pcbddc->switch_static || pcbddc->dbg_flag) { 480 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 481 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 482 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 483 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 484 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 485 } 486 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 487 if (n_vertices) { 488 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 489 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 490 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 491 } 492 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 493 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 494 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 495 496 if (pcbddc->dbg_flag) { 497 /* assemble subdomain vector on nodes */ 498 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 499 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 500 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 501 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 502 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 503 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 504 /* assemble subdomain vector of lagrange multipliers */ 505 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 506 if (n_vertices) { 507 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 508 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 509 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 510 } 511 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 512 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 513 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 514 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 515 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 516 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 517 /* check saddle point solution */ 518 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 519 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 520 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 521 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 522 /* shift by the identity matrix */ 523 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 524 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 525 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 526 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 527 } 528 } 529 /* call assembling routines for local coarse basis */ 530 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 532 if (pcbddc->switch_static || pcbddc->dbg_flag) { 533 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 534 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 535 } 536 537 /* compute other basis functions for non-symmetric problems */ 538 /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/ 539 { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */ 540 PetscBool setsym; 541 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr); 542 if (!setsym) pcbddc->issym = PETSC_FALSE; 543 } 544 545 if (!pcbddc->issym) { 546 if (!pcbddc->coarse_psi_B) { 547 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 548 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 549 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 550 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 551 } 552 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 553 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 554 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 555 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 556 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 557 } 558 for (i=0;i<pcbddc->local_primal_size;i++) { 559 if (n_constraints) { 560 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 561 for (j=0;j<n_constraints;j++) { 562 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 563 } 564 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 565 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 566 } 567 if (i<n_vertices) { 568 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 569 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 570 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 571 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 572 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 573 if (n_constraints) { 574 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 575 } 576 } else { 577 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 578 } 579 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 580 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 581 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 582 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 583 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 584 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 585 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 586 if (i<n_vertices) { 587 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 588 } 589 if (pcbddc->switch_static || pcbddc->dbg_flag) { 590 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 591 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 592 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 593 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 594 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 595 } 596 597 if (pcbddc->dbg_flag) { 598 /* assemble subdomain vector on nodes */ 599 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 600 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 601 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 602 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 603 if (i<n_vertices) { 604 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 605 } 606 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 607 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 608 /* assemble subdomain vector of lagrange multipliers */ 609 for (j=0;j<pcbddc->local_primal_size;j++) { 610 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 611 } 612 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 613 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 614 /* check saddle point solution */ 615 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 616 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 617 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 618 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 619 /* shift by the identity matrix */ 620 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 621 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 622 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 623 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 624 } 625 } 626 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 627 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 628 if (pcbddc->switch_static || pcbddc->dbg_flag) { 629 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 630 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 631 } 632 unsymmetric_check = PETSC_TRUE; 633 } else { /* take references to already computed coarse basis */ 634 unsymmetric_check = PETSC_FALSE; 635 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 636 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 637 if (pcbddc->coarse_phi_D) { 638 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 639 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 640 } 641 } 642 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 643 /* Checking coarse_sub_mat and coarse basis functios */ 644 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 645 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 646 if (pcbddc->dbg_flag) { 647 Mat coarse_sub_mat; 648 Mat AUXMAT,TM1,TM2,TM3,TM4; 649 Mat coarse_phi_D,coarse_phi_B; 650 Mat coarse_psi_D,coarse_psi_B; 651 Mat A_II,A_BB,A_IB,A_BI; 652 MatType checkmattype=MATSEQAIJ; 653 PetscReal real_value; 654 655 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 656 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 657 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 658 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 659 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 660 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 661 if (unsymmetric_check) { 662 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 663 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 664 } 665 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 666 667 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 668 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 669 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 670 if (unsymmetric_check) { 671 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 672 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 673 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 674 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 675 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 676 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 677 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 678 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 679 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 680 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 681 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 682 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 683 } else { 684 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 685 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 686 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 687 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 688 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 689 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 690 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 691 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 692 } 693 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 694 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 695 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 696 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 697 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 698 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 699 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 701 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 702 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 704 for (i=0;i<pcbddc->local_primal_size;i++) { 705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 706 } 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 708 for (i=0;i<pcbddc->local_primal_size;i++) { 709 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 710 } 711 if (unsymmetric_check) { 712 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 713 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 714 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 715 } 716 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 717 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 718 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 719 } 720 } 721 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 722 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 723 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 724 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 725 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 726 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 727 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 728 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 729 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 730 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 731 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 732 if (unsymmetric_check) { 733 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 734 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 735 } 736 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 737 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 738 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 739 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 740 } 741 /* free memory */ 742 if (n_vertices) { 743 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 744 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 745 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 746 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 747 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 748 } 749 if (n_constraints) { 750 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 751 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 752 ierr = MatDestroy(&M1);CHKERRQ(ierr); 753 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 754 } 755 ierr = PetscFree(auxindices);CHKERRQ(ierr); 756 /* get back data */ 757 *coarse_submat_vals_n = coarse_submat_vals; 758 PetscFunctionReturn(0); 759 } 760 761 #undef __FUNCT__ 762 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 763 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 764 { 765 PC_IS* pcis = (PC_IS*)(pc->data); 766 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 767 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 768 PetscBool issbaij,isseqaij; 769 /* manage repeated solves */ 770 MatReuse reuse; 771 PetscErrorCode ierr; 772 773 PetscFunctionBegin; 774 if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) { 775 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 776 } 777 /* get mat flags */ 778 reuse = MAT_INITIAL_MATRIX; 779 if (pc->setupcalled) { 780 if (pc->flag == SAME_NONZERO_PATTERN) { 781 reuse = MAT_REUSE_MATRIX; 782 } else { 783 reuse = MAT_INITIAL_MATRIX; 784 } 785 } 786 if (reuse == MAT_INITIAL_MATRIX) { 787 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 788 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 789 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 790 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 791 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 792 } 793 794 /* transform local matrices if needed */ 795 if (pcbddc->ChangeOfBasisMatrix) { 796 Mat *change_mat_all; 797 IS is_local,is_global; 798 PetscBool sorted; 799 PetscInt *idxs_perm; 800 801 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_local);CHKERRQ(ierr); 802 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 803 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 804 ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr); 805 if (!sorted) { 806 const PetscInt *idxs; 807 PetscInt *idxs_sorted,i; 808 809 ierr = PetscMalloc1(pcis->n,&idxs_perm);CHKERRQ(ierr); 810 ierr = PetscMalloc1(pcis->n,&idxs_sorted);CHKERRQ(ierr); 811 for (i=0;i<pcis->n;i++) { 812 idxs_perm[i] = i; 813 } 814 ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr); 815 ierr = PetscSortIntWithPermutation(pcis->n,idxs,idxs_perm);CHKERRQ(ierr); 816 for (i=0;i<pcis->n;i++) { 817 idxs_sorted[i] = idxs[idxs_perm[i]]; 818 } 819 ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr); 820 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 821 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr); 822 } 823 824 /* get change of basis on the whole set of local dofs */ 825 ierr = MatGetSubMatrices(pcbddc->ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr); 826 827 if (!sorted) { 828 Mat new_mat; 829 IS is_perm; 830 PetscInt *idxs,i; 831 832 ierr = PetscMalloc1(pcis->n,&idxs);CHKERRQ(ierr); 833 for (i=0;i<pcis->n;i++) { 834 idxs[idxs_perm[i]] = i; 835 } 836 ierr = PetscFree(idxs_perm);CHKERRQ(ierr); 837 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr); 838 ierr = ISSetPermutation(is_perm);CHKERRQ(ierr); 839 ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr); 840 ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr); 841 change_mat_all[0] = new_mat; 842 ierr = ISDestroy(&is_perm);CHKERRQ(ierr); 843 } 844 845 /* check */ 846 if (pcbddc->dbg_flag) { 847 Vec x,x_change; 848 PetscReal error; 849 850 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 851 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 852 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 853 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 854 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 855 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 856 ierr = MatMult(change_mat_all[0],pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 857 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 858 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 859 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 860 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 861 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 862 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 863 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 864 ierr = VecDestroy(&x);CHKERRQ(ierr); 865 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 866 } 867 868 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 869 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 870 if (isseqaij) { 871 ierr = MatPtAP(matis->A,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 872 } else { 873 Mat work_mat; 874 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 875 ierr = MatPtAP(work_mat,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 876 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 877 } 878 /* 879 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 880 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 881 */ 882 ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr); 883 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 884 } else { 885 /* without change of basis, the local matrix is unchanged */ 886 if (!pcbddc->local_mat) { 887 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 888 pcbddc->local_mat = matis->A; 889 } 890 } 891 892 /* get submatrices */ 893 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 894 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 895 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 896 if (!issbaij) { 897 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 898 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 899 } else { 900 Mat newmat; 901 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 902 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 903 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 904 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 905 } 906 PetscFunctionReturn(0); 907 } 908 909 #undef __FUNCT__ 910 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 911 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 912 { 913 PC_IS* pcis = (PC_IS*)(pc->data); 914 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 915 IS is_aux1,is_aux2; 916 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 917 PetscInt n_vertices,i,j,n_R,n_D,n_B; 918 PetscInt vbs,bs; 919 PetscBT bitmask; 920 PetscErrorCode ierr; 921 922 PetscFunctionBegin; 923 /* 924 No need to setup local scatters if 925 - primal space is unchanged 926 AND 927 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 928 AND 929 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 930 */ 931 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 932 PetscFunctionReturn(0); 933 } 934 /* destroy old objects */ 935 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 936 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 937 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 938 /* Set Non-overlapping dimensions */ 939 n_B = pcis->n_B; n_D = pcis->n - n_B; 940 n_vertices = pcbddc->n_actual_vertices; 941 /* create auxiliary bitmask */ 942 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 943 for (i=0;i<n_vertices;i++) { 944 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 945 } 946 947 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 948 ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr); 949 for (i=0, n_R=0; i<pcis->n; i++) { 950 if (!PetscBTLookup(bitmask,i)) { 951 idx_R_local[n_R] = i; 952 n_R++; 953 } 954 } 955 956 /* Block code */ 957 vbs = 1; 958 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 959 if (bs>1 && !(n_vertices%bs)) { 960 PetscBool is_blocked = PETSC_TRUE; 961 PetscInt *vary; 962 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 963 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 964 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 965 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 966 for (i=0; i<n_vertices; i++) { 967 if (vary[i]!=0 && vary[i]!=bs) { 968 is_blocked = PETSC_FALSE; 969 break; 970 } 971 } 972 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 973 vbs = bs; 974 for (i=0;i<n_R/vbs;i++) { 975 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 976 } 977 } 978 ierr = PetscFree(vary);CHKERRQ(ierr); 979 } 980 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 981 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 982 983 /* print some info if requested */ 984 if (pcbddc->dbg_flag) { 985 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 986 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 987 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 988 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 989 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 990 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 991 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 992 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 993 } 994 995 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 996 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 997 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr); 998 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr); 999 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1000 for (i=0; i<n_D; i++) { 1001 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1002 } 1003 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1004 for (i=0, j=0; i<n_R; i++) { 1005 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1006 aux_array1[j++] = i; 1007 } 1008 } 1009 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1010 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1011 for (i=0, j=0; i<n_B; i++) { 1012 if (!PetscBTLookup(bitmask,is_indices[i])) { 1013 aux_array2[j++] = i; 1014 } 1015 } 1016 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1017 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1018 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1019 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1020 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1021 1022 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1023 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1024 for (i=0, j=0; i<n_R; i++) { 1025 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1026 aux_array1[j++] = i; 1027 } 1028 } 1029 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1030 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1031 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1032 } 1033 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1034 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1035 PetscFunctionReturn(0); 1036 } 1037 1038 1039 #undef __FUNCT__ 1040 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1041 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1042 { 1043 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1044 PC_IS *pcis = (PC_IS*)pc->data; 1045 PC pc_temp; 1046 Mat A_RR; 1047 MatReuse reuse; 1048 PetscScalar m_one = -1.0; 1049 PetscReal value; 1050 PetscInt n_D,n_R,ibs,mbs; 1051 PetscBool use_exact,use_exact_reduced,issbaij; 1052 PetscErrorCode ierr; 1053 /* prefixes stuff */ 1054 char dir_prefix[256],neu_prefix[256],str_level[16]; 1055 size_t len; 1056 1057 PetscFunctionBegin; 1058 1059 /* compute prefixes */ 1060 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1061 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1062 if (!pcbddc->current_level) { 1063 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1064 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1065 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1066 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1067 } else { 1068 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1069 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1070 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1071 len -= 15; /* remove "pc_bddc_coarse_" */ 1072 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1073 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1074 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1075 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1076 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1077 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1078 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1079 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1080 } 1081 1082 /* DIRICHLET PROBLEM */ 1083 /* Matrix for Dirichlet problem is pcis->A_II */ 1084 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1085 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1086 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1087 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1088 /* default */ 1089 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1090 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1091 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1092 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1093 if (issbaij) { 1094 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1095 } else { 1096 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1097 } 1098 /* Allow user's customization */ 1099 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1100 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1101 } 1102 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1103 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1104 if (!n_D) { 1105 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1106 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1107 } 1108 /* Set Up KSP for Dirichlet problem of BDDC */ 1109 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1110 /* set ksp_D into pcis data */ 1111 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1112 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1113 pcis->ksp_D = pcbddc->ksp_D; 1114 1115 /* NEUMANN PROBLEM */ 1116 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1117 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1118 if (pcbddc->ksp_R) { /* already created ksp */ 1119 PetscInt nn_R; 1120 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1121 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1122 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1123 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1124 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1125 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1126 reuse = MAT_INITIAL_MATRIX; 1127 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1128 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1129 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1130 reuse = MAT_INITIAL_MATRIX; 1131 } else { /* safe to reuse the matrix */ 1132 reuse = MAT_REUSE_MATRIX; 1133 } 1134 } 1135 /* last check */ 1136 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1137 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1138 reuse = MAT_INITIAL_MATRIX; 1139 } 1140 } else { /* first time, so we need to create the matrix */ 1141 reuse = MAT_INITIAL_MATRIX; 1142 } 1143 /* extract A_RR */ 1144 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1145 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1146 if (ibs != mbs) { 1147 Mat newmat; 1148 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1149 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1150 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1151 } else { 1152 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1153 } 1154 if (!pcbddc->ksp_R) { /* create object if not present */ 1155 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1156 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1157 /* default */ 1158 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1159 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1160 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1161 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1162 if (issbaij) { 1163 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1164 } else { 1165 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1166 } 1167 /* Allow user's customization */ 1168 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1169 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1170 } 1171 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1172 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1173 if (!n_R) { 1174 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1175 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1176 } 1177 /* Set Up KSP for Neumann problem of BDDC */ 1178 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1179 1180 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1181 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1182 /* Dirichlet */ 1183 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1184 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1185 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1186 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1187 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1188 /* need to be adapted? */ 1189 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1190 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1191 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1192 /* print info */ 1193 if (pcbddc->dbg_flag) { 1194 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1195 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1196 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1197 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1198 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1199 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1200 } 1201 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1202 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1203 } 1204 1205 /* Neumann */ 1206 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1207 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1208 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1209 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1210 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1211 /* need to be adapted? */ 1212 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1213 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1214 /* print info */ 1215 if (pcbddc->dbg_flag) { 1216 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1217 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1218 } 1219 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1220 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1221 } 1222 } 1223 /* free Neumann problem's matrix */ 1224 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNCT__ 1229 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1230 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1231 { 1232 PetscErrorCode ierr; 1233 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1234 1235 PetscFunctionBegin; 1236 if (applytranspose) { 1237 if (pcbddc->local_auxmat1) { 1238 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1239 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1240 } 1241 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1242 } else { 1243 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1244 if (pcbddc->local_auxmat1) { 1245 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1246 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1247 } 1248 } 1249 PetscFunctionReturn(0); 1250 } 1251 1252 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1255 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1256 { 1257 PetscErrorCode ierr; 1258 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1259 PC_IS* pcis = (PC_IS*) (pc->data); 1260 const PetscScalar zero = 0.0; 1261 1262 PetscFunctionBegin; 1263 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1264 if (applytranspose) { 1265 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1266 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1267 } else { 1268 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1269 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1270 } 1271 /* start communications from local primal nodes to rhs of coarse solver */ 1272 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1273 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1274 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1275 1276 /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */ 1277 /* TODO remove null space when doing multilevel */ 1278 if (pcbddc->coarse_ksp) { 1279 if (applytranspose) { 1280 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1281 } else { 1282 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1283 } 1284 } 1285 /* start communications from coarse solver solution to local primal nodes */ 1286 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1287 1288 /* Local solution on R nodes */ 1289 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1290 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1291 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1292 if (pcbddc->switch_static) { 1293 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1294 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1295 } 1296 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1297 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1298 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1299 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1300 if (pcbddc->switch_static) { 1301 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1302 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1303 } 1304 1305 /* complete communications from coarse sol to local primal nodes */ 1306 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1307 1308 /* Sum contributions from two levels */ 1309 if (applytranspose) { 1310 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1311 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1312 } else { 1313 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1314 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1315 } 1316 PetscFunctionReturn(0); 1317 } 1318 1319 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1322 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1323 { 1324 PetscErrorCode ierr; 1325 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1326 PetscScalar *array,*array2; 1327 Vec from,to; 1328 1329 PetscFunctionBegin; 1330 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1331 from = pcbddc->coarse_vec; 1332 to = pcbddc->vec1_P; 1333 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1334 Vec tvec; 1335 PetscInt lsize; 1336 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1337 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1338 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1339 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1340 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1341 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1342 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1343 } 1344 } else { /* from local to global -> put data in coarse right hand side */ 1345 from = pcbddc->vec1_P; 1346 to = pcbddc->coarse_vec; 1347 } 1348 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1354 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1355 { 1356 PetscErrorCode ierr; 1357 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1358 PetscScalar *array,*array2; 1359 Vec from,to; 1360 1361 PetscFunctionBegin; 1362 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1363 from = pcbddc->coarse_vec; 1364 to = pcbddc->vec1_P; 1365 } else { /* from local to global -> put data in coarse right hand side */ 1366 from = pcbddc->vec1_P; 1367 to = pcbddc->coarse_vec; 1368 } 1369 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1370 if (smode == SCATTER_FORWARD) { 1371 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1372 Vec tvec; 1373 PetscInt lsize; 1374 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1375 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1376 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1377 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1378 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1379 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1380 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1381 } 1382 } 1383 PetscFunctionReturn(0); 1384 } 1385 1386 /* uncomment for testing purposes */ 1387 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1390 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1391 { 1392 PetscErrorCode ierr; 1393 PC_IS* pcis = (PC_IS*)(pc->data); 1394 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1395 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1396 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1397 MatType impMatType=MATSEQAIJ; 1398 /* one and zero */ 1399 PetscScalar one=1.0,zero=0.0; 1400 /* space to store constraints and their local indices */ 1401 PetscScalar *temp_quadrature_constraint; 1402 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1403 /* iterators */ 1404 PetscInt i,j,k,total_counts,temp_start_ptr; 1405 /* stuff to store connected components stored in pcbddc->mat_graph */ 1406 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1407 PetscInt n_ISForFaces,n_ISForEdges; 1408 /* near null space stuff */ 1409 MatNullSpace nearnullsp; 1410 const Vec *nearnullvecs; 1411 Vec *localnearnullsp; 1412 PetscBool nnsp_has_cnst; 1413 PetscInt nnsp_size; 1414 PetscScalar *array; 1415 /* BLAS integers */ 1416 PetscBLASInt lwork,lierr; 1417 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1418 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1419 /* LAPACK working arrays for SVD or POD */ 1420 PetscBool skip_lapack; 1421 PetscScalar *work; 1422 PetscReal *singular_vals; 1423 #if defined(PETSC_USE_COMPLEX) 1424 PetscReal *rwork; 1425 #endif 1426 #if defined(PETSC_MISSING_LAPACK_GESVD) 1427 PetscBLASInt Blas_one_2=1; 1428 PetscScalar *temp_basis,*correlation_mat; 1429 #else 1430 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1431 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1432 #endif 1433 /* reuse */ 1434 PetscInt olocal_primal_size; 1435 PetscInt *oprimal_indices_local_idxs; 1436 /* change of basis */ 1437 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1438 PetscBool boolforchange,qr_needed; 1439 PetscBT touched,change_basis,qr_needed_idx; 1440 /* auxiliary stuff */ 1441 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1442 /* some quantities */ 1443 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1444 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1445 1446 1447 PetscFunctionBegin; 1448 /* Destroy Mat objects computed previously */ 1449 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1450 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1451 /* Get index sets for faces, edges and vertices from graph */ 1452 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1453 /* free unneeded index sets */ 1454 if (!pcbddc->use_vertices) { 1455 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1456 } 1457 if (!pcbddc->use_edges) { 1458 for (i=0;i<n_ISForEdges;i++) { 1459 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1460 } 1461 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1462 n_ISForEdges = 0; 1463 } 1464 if (!pcbddc->use_faces) { 1465 for (i=0;i<n_ISForFaces;i++) { 1466 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1467 } 1468 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1469 n_ISForFaces = 0; 1470 } 1471 /* HACKS (the following two blocks of code) */ 1472 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1473 pcbddc->use_change_of_basis = PETSC_TRUE; 1474 if (!ISForEdges) { 1475 pcbddc->use_change_on_faces = PETSC_TRUE; 1476 } 1477 } 1478 if (pcbddc->NullSpace) { 1479 /* use_change_of_basis should be consistent among processors */ 1480 PetscBool tbool[2],gbool[2]; 1481 tbool [0] = pcbddc->use_change_of_basis; 1482 tbool [1] = pcbddc->use_change_on_faces; 1483 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1484 pcbddc->use_change_of_basis = gbool[0]; 1485 pcbddc->use_change_on_faces = gbool[1]; 1486 } 1487 /* print some info */ 1488 if (pcbddc->dbg_flag) { 1489 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1490 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1491 i = 0; 1492 if (ISForVertices) { 1493 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1494 } 1495 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1496 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1497 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1498 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1499 } 1500 /* check if near null space is attached to global mat */ 1501 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1502 if (nearnullsp) { 1503 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1504 /* remove any stored info */ 1505 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1506 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1507 /* store information for BDDC solver reuse */ 1508 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1509 pcbddc->onearnullspace = nearnullsp; 1510 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1511 for (i=0;i<nnsp_size;i++) { 1512 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1513 } 1514 } else { /* if near null space is not provided BDDC uses constants by default */ 1515 nnsp_size = 0; 1516 nnsp_has_cnst = PETSC_TRUE; 1517 } 1518 /* get max number of constraints on a single cc */ 1519 max_constraints = nnsp_size; 1520 if (nnsp_has_cnst) max_constraints++; 1521 1522 /* 1523 Evaluate maximum storage size needed by the procedure 1524 - temp_indices will contain start index of each constraint stored as follows 1525 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1526 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1527 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1528 */ 1529 total_counts = n_ISForFaces+n_ISForEdges; 1530 total_counts *= max_constraints; 1531 n_vertices = 0; 1532 if (ISForVertices) { 1533 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1534 } 1535 total_counts += n_vertices; 1536 ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr); 1537 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1538 total_counts = 0; 1539 max_size_of_constraint = 0; 1540 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1541 if (i<n_ISForEdges) { 1542 used_IS = &ISForEdges[i]; 1543 } else { 1544 used_IS = &ISForFaces[i-n_ISForEdges]; 1545 } 1546 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1547 total_counts += j; 1548 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1549 } 1550 total_counts *= max_constraints; 1551 total_counts += n_vertices; 1552 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1553 /* get local part of global near null space vectors */ 1554 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1555 for (k=0;k<nnsp_size;k++) { 1556 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1557 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1558 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1559 } 1560 1561 /* whether or not to skip lapack calls */ 1562 skip_lapack = PETSC_TRUE; 1563 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1564 1565 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1566 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1567 PetscScalar temp_work; 1568 #if defined(PETSC_MISSING_LAPACK_GESVD) 1569 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1570 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1571 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1572 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1573 #if defined(PETSC_USE_COMPLEX) 1574 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1575 #endif 1576 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1577 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1578 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1579 lwork = -1; 1580 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1581 #if !defined(PETSC_USE_COMPLEX) 1582 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1583 #else 1584 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1585 #endif 1586 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1587 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1588 #else /* on missing GESVD */ 1589 /* SVD */ 1590 PetscInt max_n,min_n; 1591 max_n = max_size_of_constraint; 1592 min_n = max_constraints; 1593 if (max_size_of_constraint < max_constraints) { 1594 min_n = max_size_of_constraint; 1595 max_n = max_constraints; 1596 } 1597 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1598 #if defined(PETSC_USE_COMPLEX) 1599 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1600 #endif 1601 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1602 lwork = -1; 1603 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1604 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1605 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1606 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1607 #if !defined(PETSC_USE_COMPLEX) 1608 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1609 #else 1610 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1611 #endif 1612 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1613 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1614 #endif /* on missing GESVD */ 1615 /* Allocate optimal workspace */ 1616 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1617 ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr); 1618 } 1619 /* Now we can loop on constraining sets */ 1620 total_counts = 0; 1621 temp_indices[0] = 0; 1622 /* vertices */ 1623 if (ISForVertices) { 1624 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1625 if (nnsp_has_cnst) { /* consider all vertices */ 1626 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1627 for (i=0;i<n_vertices;i++) { 1628 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1629 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1630 total_counts++; 1631 } 1632 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1633 PetscBool used_vertex; 1634 for (i=0;i<n_vertices;i++) { 1635 used_vertex = PETSC_FALSE; 1636 k = 0; 1637 while (!used_vertex && k<nnsp_size) { 1638 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1639 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1640 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1641 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1642 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1643 total_counts++; 1644 used_vertex = PETSC_TRUE; 1645 } 1646 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1647 k++; 1648 } 1649 } 1650 } 1651 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1652 n_vertices = total_counts; 1653 } 1654 1655 /* edges and faces */ 1656 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1657 if (i<n_ISForEdges) { 1658 used_IS = &ISForEdges[i]; 1659 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1660 } else { 1661 used_IS = &ISForFaces[i-n_ISForEdges]; 1662 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1663 } 1664 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1665 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1666 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1667 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1668 /* change of basis should not be performed on local periodic nodes */ 1669 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1670 if (nnsp_has_cnst) { 1671 PetscScalar quad_value; 1672 temp_constraints++; 1673 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1674 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1675 for (j=0;j<size_of_constraint;j++) { 1676 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1677 } 1678 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1679 total_counts++; 1680 } 1681 for (k=0;k<nnsp_size;k++) { 1682 PetscReal real_value; 1683 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1684 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1685 for (j=0;j<size_of_constraint;j++) { 1686 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1687 } 1688 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1689 /* check if array is null on the connected component */ 1690 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1691 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1692 if (real_value > 0.0) { /* keep indices and values */ 1693 temp_constraints++; 1694 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1695 total_counts++; 1696 } 1697 } 1698 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1699 valid_constraints = temp_constraints; 1700 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1701 if (!pcbddc->use_nnsp_true && temp_constraints) { 1702 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1703 1704 #if defined(PETSC_MISSING_LAPACK_GESVD) 1705 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1706 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1707 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1708 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1709 from that computed using LAPACKgesvd 1710 -> This is due to a different computation of eigenvectors in LAPACKheev 1711 -> The quality of the POD-computed basis will be the same */ 1712 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1713 /* Store upper triangular part of correlation matrix */ 1714 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1715 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1716 for (j=0;j<temp_constraints;j++) { 1717 for (k=0;k<j+1;k++) { 1718 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1719 } 1720 } 1721 /* compute eigenvalues and eigenvectors of correlation matrix */ 1722 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1723 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1724 #if !defined(PETSC_USE_COMPLEX) 1725 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1726 #else 1727 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1728 #endif 1729 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1730 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1731 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1732 j=0; 1733 while (j < temp_constraints && singular_vals[j] < tol) j++; 1734 total_counts=total_counts-j; 1735 valid_constraints = temp_constraints-j; 1736 /* scale and copy POD basis into used quadrature memory */ 1737 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1738 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1739 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1740 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1741 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1742 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1743 if (j<temp_constraints) { 1744 PetscInt ii; 1745 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1746 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1747 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1748 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1749 for (k=0;k<temp_constraints-j;k++) { 1750 for (ii=0;ii<size_of_constraint;ii++) { 1751 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1752 } 1753 } 1754 } 1755 #else /* on missing GESVD */ 1756 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1757 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1758 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1759 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1760 #if !defined(PETSC_USE_COMPLEX) 1761 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1762 #else 1763 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1764 #endif 1765 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1766 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1767 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1768 k = temp_constraints; 1769 if (k > size_of_constraint) k = size_of_constraint; 1770 j = 0; 1771 while (j < k && singular_vals[k-j-1] < tol) j++; 1772 total_counts = total_counts-temp_constraints+k-j; 1773 valid_constraints = k-j; 1774 #endif /* on missing GESVD */ 1775 } 1776 /* setting change_of_basis flag is safe now */ 1777 if (boolforchange) { 1778 for (j=0;j<valid_constraints;j++) { 1779 PetscBTSet(change_basis,total_counts-j-1); 1780 } 1781 } 1782 } 1783 /* free index sets of faces, edges and vertices */ 1784 for (i=0;i<n_ISForFaces;i++) { 1785 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1786 } 1787 if (n_ISForFaces) { 1788 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1789 } 1790 for (i=0;i<n_ISForEdges;i++) { 1791 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1792 } 1793 if (n_ISForEdges) { 1794 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1795 } 1796 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1797 /* map temp_indices_to_constraint in boundary numbering */ 1798 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1799 if (i != temp_indices[total_counts]) { 1800 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1801 } 1802 1803 /* free workspace */ 1804 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1805 ierr = PetscFree(work);CHKERRQ(ierr); 1806 #if defined(PETSC_USE_COMPLEX) 1807 ierr = PetscFree(rwork);CHKERRQ(ierr); 1808 #endif 1809 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1810 #if defined(PETSC_MISSING_LAPACK_GESVD) 1811 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1812 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1813 #endif 1814 } 1815 for (k=0;k<nnsp_size;k++) { 1816 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1817 } 1818 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1819 1820 /* set quantities in pcbddc data structure and store previous primal size */ 1821 /* n_vertices defines the number of subdomain corners in the primal space */ 1822 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1823 olocal_primal_size = pcbddc->local_primal_size; 1824 pcbddc->local_primal_size = total_counts; 1825 pcbddc->n_vertices = n_vertices; 1826 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1827 1828 /* Create constraint matrix */ 1829 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1830 /* so we need to set it up properly either with or without change of basis */ 1831 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1832 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1833 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1834 /* array to compute a local numbering of constraints : vertices first then constraints */ 1835 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1836 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1837 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1838 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1839 /* auxiliary stuff for basis change */ 1840 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1841 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1842 1843 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1844 total_primal_vertices=0; 1845 for (i=0;i<pcbddc->local_primal_size;i++) { 1846 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1847 if (size_of_constraint == 1) { 1848 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1849 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1850 aux_primal_minloc[total_primal_vertices]=0; 1851 total_primal_vertices++; 1852 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1853 PetscInt min_loc,min_index; 1854 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1855 /* find first untouched local node */ 1856 k = 0; 1857 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1858 min_index = global_indices[k]; 1859 min_loc = k; 1860 /* search the minimum among global nodes already untouched on the cc */ 1861 for (k=1;k<size_of_constraint;k++) { 1862 /* there can be more than one constraint on a single connected component */ 1863 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1864 min_index = global_indices[k]; 1865 min_loc = k; 1866 } 1867 } 1868 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1869 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1870 aux_primal_minloc[total_primal_vertices]=min_loc; 1871 total_primal_vertices++; 1872 } 1873 } 1874 /* determine if a QR strategy is needed for change of basis */ 1875 qr_needed = PETSC_FALSE; 1876 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1877 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1878 if (PetscBTLookup(change_basis,i)) { 1879 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1880 j = 0; 1881 for (k=0;k<size_of_constraint;k++) { 1882 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1883 j++; 1884 } 1885 } 1886 /* found more than one primal dof on the cc */ 1887 if (j > 1) { 1888 PetscBTSet(qr_needed_idx,i); 1889 qr_needed = PETSC_TRUE; 1890 } 1891 } 1892 } 1893 /* free workspace */ 1894 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1895 1896 /* permute indices in order to have a sorted set of vertices */ 1897 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1898 1899 /* nonzero structure of constraint matrix */ 1900 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1901 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1902 j=total_primal_vertices; 1903 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1904 if (!PetscBTLookup(change_basis,i)) { 1905 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1906 j++; 1907 } 1908 } 1909 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1910 ierr = PetscFree(nnz);CHKERRQ(ierr); 1911 /* set values in constraint matrix */ 1912 for (i=0;i<total_primal_vertices;i++) { 1913 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1914 } 1915 total_counts = total_primal_vertices; 1916 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1917 if (!PetscBTLookup(change_basis,i)) { 1918 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1919 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1920 total_counts++; 1921 } 1922 } 1923 /* assembling */ 1924 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1925 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1926 /* 1927 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1928 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1929 */ 1930 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1931 if (pcbddc->use_change_of_basis) { 1932 /* dual and primal dofs on a single cc */ 1933 PetscInt dual_dofs,primal_dofs; 1934 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1935 PetscInt primal_counter; 1936 /* working stuff for GEQRF */ 1937 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 1938 PetscBLASInt lqr_work; 1939 /* working stuff for UNGQR */ 1940 PetscScalar *gqr_work,lgqr_work_t; 1941 PetscBLASInt lgqr_work; 1942 /* working stuff for TRTRS */ 1943 PetscScalar *trs_rhs; 1944 PetscBLASInt Blas_NRHS; 1945 /* pointers for values insertion into change of basis matrix */ 1946 PetscInt *start_rows,*start_cols; 1947 PetscScalar *start_vals; 1948 /* working stuff for values insertion */ 1949 PetscBT is_primal; 1950 /* matrix sizes */ 1951 PetscInt global_size,local_size; 1952 /* work array for nonzeros */ 1953 PetscScalar *nnz_array; 1954 /* temporary change of basis */ 1955 Mat localChangeOfBasisMatrix; 1956 /* auxiliary work for global change of basis */ 1957 Vec nnz_vec; 1958 PetscInt *idxs_I,*idxs_B,*idxs_all; 1959 PetscInt nvtxs,*xadj,*adjncy,*idxs_mapped; 1960 PetscScalar *vals; 1961 PetscBool done; 1962 1963 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 1964 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 1965 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1966 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1967 1968 /* nonzeros for local mat */ 1969 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 1970 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1971 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1972 if (PetscBTLookup(change_basis,i)) { 1973 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1974 if (PetscBTLookup(qr_needed_idx,i)) { 1975 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1976 } else { 1977 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1978 /* get local primal index on the cc */ 1979 j = 0; 1980 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1981 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1982 } 1983 } 1984 } 1985 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1986 /* Set initial identity in the matrix */ 1987 for (i=0;i<pcis->n_B;i++) { 1988 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1989 } 1990 1991 if (pcbddc->dbg_flag) { 1992 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1993 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1994 } 1995 1996 1997 /* Now we loop on the constraints which need a change of basis */ 1998 /* 1999 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2000 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2001 2002 Basic blocks of change of basis matrix T computed 2003 2004 - Using the following block transformation if there is only a primal dof on the cc 2005 (in the example, primal dof is the last one of the edge in LOCAL ordering 2006 in this code, primal dof is the first one of the edge in GLOBAL ordering) 2007 | 1 0 ... 0 1 | 2008 | 0 1 ... 0 1 | 2009 | ... | 2010 | 0 ... 1 1 | 2011 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 2012 2013 - via QR decomposition of constraints otherwise 2014 */ 2015 if (qr_needed) { 2016 /* space to store Q */ 2017 ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr); 2018 /* first we issue queries for optimal work */ 2019 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2020 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2021 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2022 lqr_work = -1; 2023 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2024 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2025 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2026 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2027 lgqr_work = -1; 2028 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2029 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2030 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2031 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2032 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2033 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2034 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2035 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2036 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2037 /* array to store scaling factors for reflectors */ 2038 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2039 /* array to store rhs and solution of triangular solver */ 2040 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2041 /* allocating workspace for check */ 2042 if (pcbddc->dbg_flag) { 2043 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2044 } 2045 } 2046 /* array to store whether a node is primal or not */ 2047 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2048 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2049 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2050 if (i != total_primal_vertices) { 2051 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2052 } 2053 for (i=0;i<total_primal_vertices;i++) { 2054 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2055 } 2056 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2057 2058 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2059 /* -> using implicit ordering contained in temp_indices data */ 2060 total_counts = pcbddc->n_vertices; 2061 primal_counter = total_counts; 2062 while (total_counts<pcbddc->local_primal_size) { 2063 primal_dofs = 1; 2064 if (PetscBTLookup(change_basis,total_counts)) { 2065 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2066 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2067 primal_dofs++; 2068 } 2069 /* get constraint info */ 2070 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2071 dual_dofs = size_of_constraint-primal_dofs; 2072 2073 if (pcbddc->dbg_flag) { 2074 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2075 } 2076 2077 if (primal_dofs > 1) { /* QR */ 2078 2079 /* copy quadrature constraints for change of basis check */ 2080 if (pcbddc->dbg_flag) { 2081 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2082 } 2083 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2084 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2085 2086 /* compute QR decomposition of constraints */ 2087 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2088 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2089 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2090 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2091 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2092 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2093 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2094 2095 /* explictly compute R^-T */ 2096 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2097 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2098 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2099 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2100 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2101 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2102 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2103 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2104 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2105 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2106 2107 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2108 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2109 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2110 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2111 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2112 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2113 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2114 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2115 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2116 2117 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2118 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2119 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2120 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2121 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2122 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2123 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2124 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2125 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2126 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2127 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2128 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2129 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2130 2131 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2132 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2133 /* insert cols for primal dofs */ 2134 for (j=0;j<primal_dofs;j++) { 2135 start_vals = &qr_basis[j*size_of_constraint]; 2136 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2137 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2138 } 2139 /* insert cols for dual dofs */ 2140 for (j=0,k=0;j<dual_dofs;k++) { 2141 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2142 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2143 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2144 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2145 j++; 2146 } 2147 } 2148 2149 /* check change of basis */ 2150 if (pcbddc->dbg_flag) { 2151 PetscInt ii,jj; 2152 PetscBool valid_qr=PETSC_TRUE; 2153 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2154 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2155 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2156 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2157 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2158 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2159 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2160 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2161 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2162 for (jj=0;jj<size_of_constraint;jj++) { 2163 for (ii=0;ii<primal_dofs;ii++) { 2164 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2165 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2166 } 2167 } 2168 if (!valid_qr) { 2169 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2170 for (jj=0;jj<size_of_constraint;jj++) { 2171 for (ii=0;ii<primal_dofs;ii++) { 2172 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2173 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2174 } 2175 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2176 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2177 } 2178 } 2179 } 2180 } else { 2181 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2182 } 2183 } 2184 } else { /* simple transformation block */ 2185 PetscInt row,col; 2186 PetscScalar val; 2187 for (j=0;j<size_of_constraint;j++) { 2188 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2189 if (!PetscBTLookup(is_primal,row)) { 2190 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2191 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2192 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2193 } else { 2194 for (k=0;k<size_of_constraint;k++) { 2195 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2196 if (row != col) { 2197 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2198 } else { 2199 val = 1.0; 2200 } 2201 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2202 } 2203 } 2204 } 2205 if (pcbddc->dbg_flag) { 2206 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2207 } 2208 } 2209 /* increment primal counter */ 2210 primal_counter += primal_dofs; 2211 } else { 2212 if (pcbddc->dbg_flag) { 2213 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2214 } 2215 } 2216 /* increment constraint counter total_counts */ 2217 total_counts += primal_dofs; 2218 } 2219 2220 /* free workspace */ 2221 if (qr_needed) { 2222 if (pcbddc->dbg_flag) { 2223 ierr = PetscFree(work);CHKERRQ(ierr); 2224 } 2225 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2226 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2227 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2228 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2229 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2230 } 2231 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2232 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2233 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2234 2235 /* assembling of global change of variable */ 2236 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2237 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2238 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2239 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2240 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2241 ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr); 2242 2243 /* nonzeros (overestimated) */ 2244 ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr); 2245 ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr); 2246 ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr); 2247 for (i=0;i<pcis->n;i++) { 2248 nnz_array[i] = 1.0; 2249 idxs_all[i] = i; 2250 } 2251 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2252 for (i=0;i<pcis->n_B;i++) { 2253 nnz_array[idxs_B[i]] = nnz[i]; 2254 } 2255 ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr); 2256 ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr); 2257 ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr); 2258 ierr = PetscFree(nnz);CHKERRQ(ierr); 2259 ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr); 2260 ierr = PetscMalloc1(local_size,&nnz);CHKERRQ(ierr); 2261 ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2262 for (i=0;i<local_size;i++) { 2263 nnz[i] = (PetscInt)(PetscRealPart(nnz_array[i])); 2264 } 2265 ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2266 ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr); 2267 ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz,0,nnz);CHKERRQ(ierr); 2268 ierr = PetscFree(nnz);CHKERRQ(ierr); 2269 2270 /* Set identity on dirichlet dofs */ 2271 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2272 for (i=0;i<pcis->n-pcis->n_B;i++) { 2273 PetscScalar one=1.0; 2274 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr); 2275 } 2276 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2277 2278 /* Set values at interface dofs */ 2279 done = PETSC_TRUE; 2280 ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2281 if (!done) { 2282 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2283 } 2284 ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2285 ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr); 2286 ierr = ISLocalToGlobalMappingApply(pcbddc->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr); 2287 for (i=0;i<nvtxs;i++) { 2288 PetscInt row,*cols,ncols; 2289 PetscScalar *mat_vals; 2290 2291 row = idxs_B[i]; 2292 ncols = xadj[i+1]-xadj[i]; 2293 cols = idxs_mapped+xadj[i]; 2294 mat_vals = vals+xadj[i]; 2295 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr); 2296 } 2297 ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2298 if (!done) { 2299 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2300 } 2301 ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2302 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2303 ierr = PetscFree(idxs_mapped);CHKERRQ(ierr); 2304 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2305 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2306 2307 /* check */ 2308 if (pcbddc->dbg_flag) { 2309 PetscReal error; 2310 Vec x,x_change; 2311 2312 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2313 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2314 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2315 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2316 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2317 ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2318 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 2319 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2320 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2321 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2322 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2323 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2324 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2325 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr); 2326 ierr = VecDestroy(&x);CHKERRQ(ierr); 2327 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2328 } 2329 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2330 2331 /* 2332 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2333 ierr = MatView(local_ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2334 */ 2335 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2336 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2337 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2338 } 2339 2340 /* set up change of basis context */ 2341 if (pcbddc->ChangeOfBasisMatrix) { 2342 PCBDDCChange_ctx change_ctx; 2343 2344 if (!pcbddc->new_global_mat) { 2345 PetscInt global_size,local_size; 2346 2347 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2348 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2349 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2350 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2351 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2352 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2353 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2354 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2355 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2356 } else { 2357 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2358 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2359 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2360 } 2361 if (!pcbddc->user_ChangeOfBasisMatrix) { 2362 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2363 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2364 } else { 2365 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2366 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2367 } 2368 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2369 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2370 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2371 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2372 } 2373 2374 /* get indices in local ordering for vertices and constraints */ 2375 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2376 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2377 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2378 } 2379 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2380 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2381 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2382 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2383 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2384 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2385 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2386 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2387 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2388 /* set quantities in PCBDDC data struct */ 2389 pcbddc->n_actual_vertices = i; 2390 /* check if a new primal space has been introduced */ 2391 pcbddc->new_primal_space_local = PETSC_TRUE; 2392 if (olocal_primal_size == pcbddc->local_primal_size) { 2393 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2394 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2395 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2396 } 2397 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2398 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2399 2400 /* flush dbg viewer */ 2401 if (pcbddc->dbg_flag) { 2402 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2403 } 2404 2405 /* free workspace */ 2406 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2407 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2408 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2409 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2410 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2411 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2412 PetscFunctionReturn(0); 2413 } 2414 2415 #undef __FUNCT__ 2416 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2417 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2418 { 2419 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2420 PC_IS *pcis = (PC_IS*)pc->data; 2421 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2422 PetscInt ierr,i,vertex_size; 2423 PetscViewer viewer=pcbddc->dbg_viewer; 2424 2425 PetscFunctionBegin; 2426 /* Reset previously computed graph */ 2427 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2428 /* Init local Graph struct */ 2429 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2430 2431 /* Check validity of the csr graph passed in by the user */ 2432 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2433 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2434 } 2435 2436 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2437 if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) { 2438 Mat mat_adj; 2439 const PetscInt *xadj,*adjncy; 2440 PetscBool flg_row=PETSC_TRUE; 2441 2442 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2443 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2444 if (!flg_row) { 2445 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2446 } 2447 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2448 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2449 if (!flg_row) { 2450 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2451 } 2452 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2453 pcbddc->deluxe_compute_rowadj = PETSC_FALSE; 2454 } 2455 2456 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2457 vertex_size = 1; 2458 if (pcbddc->user_provided_isfordofs) { 2459 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2460 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2461 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2462 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2463 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2464 } 2465 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2466 pcbddc->n_ISForDofs = 0; 2467 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2468 } 2469 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2470 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2471 } else { 2472 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2473 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2474 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2475 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2476 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2477 } 2478 } 2479 } 2480 2481 /* Setup of Graph */ 2482 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2483 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2484 } 2485 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2486 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2487 } 2488 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2489 2490 /* Graph's connected components analysis */ 2491 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2492 2493 /* print some info to stdout */ 2494 if (pcbddc->dbg_flag) { 2495 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2496 } 2497 2498 /* mark topography has done */ 2499 pcbddc->recompute_topography = PETSC_FALSE; 2500 PetscFunctionReturn(0); 2501 } 2502 2503 #undef __FUNCT__ 2504 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2505 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2506 { 2507 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2508 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2509 PetscErrorCode ierr; 2510 2511 PetscFunctionBegin; 2512 n = 0; 2513 vertices = 0; 2514 if (pcbddc->ConstraintMatrix) { 2515 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2516 for (i=0;i<local_primal_size;i++) { 2517 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2518 if (size_of_constraint == 1) n++; 2519 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2520 } 2521 if (vertices_idx) { 2522 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2523 n = 0; 2524 for (i=0;i<local_primal_size;i++) { 2525 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2526 if (size_of_constraint == 1) { 2527 vertices[n++]=row_cmat_indices[0]; 2528 } 2529 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2530 } 2531 } 2532 } 2533 *n_vertices = n; 2534 if (vertices_idx) *vertices_idx = vertices; 2535 PetscFunctionReturn(0); 2536 } 2537 2538 #undef __FUNCT__ 2539 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2540 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2541 { 2542 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2543 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2544 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2545 PetscBT touched; 2546 PetscErrorCode ierr; 2547 2548 /* This function assumes that the number of local constraints per connected component 2549 is not greater than the number of nodes defined for the connected component 2550 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2551 PetscFunctionBegin; 2552 n = 0; 2553 constraints_index = 0; 2554 if (pcbddc->ConstraintMatrix) { 2555 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2556 max_size_of_constraint = 0; 2557 for (i=0;i<local_primal_size;i++) { 2558 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2559 if (size_of_constraint > 1) { 2560 n++; 2561 } 2562 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2563 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2564 } 2565 if (constraints_idx) { 2566 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2567 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2568 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2569 n = 0; 2570 for (i=0;i<local_primal_size;i++) { 2571 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2572 if (size_of_constraint > 1) { 2573 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2574 /* find first untouched local node */ 2575 j = 0; 2576 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2577 min_index = row_cmat_global_indices[j]; 2578 min_loc = j; 2579 /* search the minimum among nodes not yet touched on the connected component 2580 since there can be more than one constraint on a single cc */ 2581 for (j=1;j<size_of_constraint;j++) { 2582 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2583 min_index = row_cmat_global_indices[j]; 2584 min_loc = j; 2585 } 2586 } 2587 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2588 constraints_index[n++] = row_cmat_indices[min_loc]; 2589 } 2590 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2591 } 2592 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2593 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2594 } 2595 } 2596 *n_constraints = n; 2597 if (constraints_idx) *constraints_idx = constraints_index; 2598 PetscFunctionReturn(0); 2599 } 2600 2601 #undef __FUNCT__ 2602 #define __FUNCT__ "PCBDDCSubsetNumbering" 2603 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2604 { 2605 Vec local_vec,global_vec; 2606 IS seqis,paris; 2607 VecScatter scatter_ctx; 2608 PetscScalar *array; 2609 PetscInt *temp_global_dofs; 2610 PetscScalar globalsum; 2611 PetscInt i,j,s; 2612 PetscInt nlocals,first_index,old_index,max_local; 2613 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2614 PetscMPIInt *dof_sizes,*dof_displs; 2615 PetscBool first_found; 2616 PetscErrorCode ierr; 2617 2618 PetscFunctionBegin; 2619 /* mpi buffers */ 2620 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2621 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2622 j = ( !rank_prec_comm ? size_prec_comm : 0); 2623 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2624 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2625 /* get maximum size of subset */ 2626 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2627 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2628 max_local = 0; 2629 for (i=0;i<n_local_dofs;i++) { 2630 if (max_local < temp_global_dofs[i] ) { 2631 max_local = temp_global_dofs[i]; 2632 } 2633 } 2634 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2635 max_global++; 2636 max_local = 0; 2637 for (i=0;i<n_local_dofs;i++) { 2638 if (max_local < local_dofs[i] ) { 2639 max_local = local_dofs[i]; 2640 } 2641 } 2642 max_local++; 2643 /* allocate workspace */ 2644 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2645 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2646 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2647 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2648 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2649 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2650 /* create scatter */ 2651 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2652 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2653 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2654 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2655 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2656 /* init array */ 2657 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2658 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2659 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2660 if (local_dofs_mult) { 2661 for (i=0;i<n_local_dofs;i++) { 2662 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2663 } 2664 } else { 2665 for (i=0;i<n_local_dofs;i++) { 2666 array[local_dofs[i]]=1.0; 2667 } 2668 } 2669 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2670 /* scatter into global vec and get total number of global dofs */ 2671 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2672 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2673 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2674 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2675 /* Fill global_vec with cumulative function for global numbering */ 2676 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2677 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2678 nlocals = 0; 2679 first_index = -1; 2680 first_found = PETSC_FALSE; 2681 for (i=0;i<s;i++) { 2682 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2683 first_found = PETSC_TRUE; 2684 first_index = i; 2685 } 2686 nlocals += (PetscInt)PetscRealPart(array[i]); 2687 } 2688 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2689 if (!rank_prec_comm) { 2690 dof_displs[0]=0; 2691 for (i=1;i<size_prec_comm;i++) { 2692 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2693 } 2694 } 2695 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2696 if (first_found) { 2697 array[first_index] += (PetscScalar)nlocals; 2698 old_index = first_index; 2699 for (i=first_index+1;i<s;i++) { 2700 if (PetscRealPart(array[i]) > 0.1) { 2701 array[i] += array[old_index]; 2702 old_index = i; 2703 } 2704 } 2705 } 2706 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2707 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2708 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2709 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2710 /* get global ordering of local dofs */ 2711 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2712 if (local_dofs_mult) { 2713 for (i=0;i<n_local_dofs;i++) { 2714 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2715 } 2716 } else { 2717 for (i=0;i<n_local_dofs;i++) { 2718 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2719 } 2720 } 2721 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2722 /* free workspace */ 2723 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2724 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2725 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2726 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2727 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2728 /* return pointer to global ordering of local dofs */ 2729 *global_numbering_subset = temp_global_dofs; 2730 PetscFunctionReturn(0); 2731 } 2732 2733 #undef __FUNCT__ 2734 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2735 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2736 { 2737 PetscInt i,j; 2738 PetscScalar *alphas; 2739 PetscErrorCode ierr; 2740 2741 PetscFunctionBegin; 2742 /* this implements stabilized Gram-Schmidt */ 2743 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2744 for (i=0;i<n;i++) { 2745 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2746 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2747 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2748 } 2749 ierr = PetscFree(alphas);CHKERRQ(ierr); 2750 PetscFunctionReturn(0); 2751 } 2752 2753 #undef __FUNCT__ 2754 #define __FUNCT__ "MatISGetSubassemblingPattern" 2755 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2756 { 2757 Mat subdomain_adj; 2758 IS new_ranks,ranks_send_to; 2759 MatPartitioning partitioner; 2760 Mat_IS *matis; 2761 PetscInt n_neighs,*neighs,*n_shared,**shared; 2762 PetscInt prank; 2763 PetscMPIInt size,rank,color; 2764 PetscInt *xadj,*adjncy,*oldranks; 2765 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2766 PetscInt i,j,local_size,threshold=0; 2767 PetscErrorCode ierr; 2768 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2769 PetscSubcomm subcomm; 2770 2771 PetscFunctionBegin; 2772 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2773 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2774 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2775 2776 /* Get info on mapping */ 2777 matis = (Mat_IS*)(mat->data); 2778 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2779 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2780 2781 /* build local CSR graph of subdomains' connectivity */ 2782 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2783 xadj[0] = 0; 2784 xadj[1] = PetscMax(n_neighs-1,0); 2785 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2786 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2787 2788 if (threshold) { 2789 PetscInt* count,min_threshold; 2790 ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr); 2791 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2792 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2793 for (j=0;j<n_shared[i];j++) { 2794 count[shared[i][j]] += 1; 2795 } 2796 } 2797 /* adapt threshold since we dont want to lose significant connections */ 2798 min_threshold = n_neighs; 2799 for (i=1;i<n_neighs;i++) { 2800 for (j=0;j<n_shared[i];j++) { 2801 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2802 } 2803 } 2804 threshold = PetscMax(min_threshold+1,threshold); 2805 xadj[1] = 0; 2806 for (i=1;i<n_neighs;i++) { 2807 for (j=0;j<n_shared[i];j++) { 2808 if (count[shared[i][j]] < threshold) { 2809 adjncy[xadj[1]] = neighs[i]; 2810 adjncy_wgt[xadj[1]] = n_shared[i]; 2811 xadj[1]++; 2812 break; 2813 } 2814 } 2815 } 2816 ierr = PetscFree(count);CHKERRQ(ierr); 2817 } else { 2818 if (xadj[1]) { 2819 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2820 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2821 } 2822 } 2823 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2824 if (use_square) { 2825 for (i=0;i<xadj[1];i++) { 2826 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2827 } 2828 } 2829 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2830 2831 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2832 2833 /* 2834 Restrict work on active processes only. 2835 */ 2836 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2837 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2838 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2839 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2840 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2841 if (color) { 2842 ierr = PetscFree(xadj);CHKERRQ(ierr); 2843 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2844 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2845 } else { 2846 PetscInt coarsening_ratio; 2847 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2848 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2849 prank = rank; 2850 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2851 /* 2852 for (i=0;i<size;i++) { 2853 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2854 } 2855 */ 2856 for (i=0;i<xadj[1];i++) { 2857 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2858 } 2859 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2860 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2861 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2862 2863 /* Partition */ 2864 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2865 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2866 if (use_vwgt) { 2867 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2868 v_wgt[0] = local_size; 2869 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2870 } 2871 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2872 coarsening_ratio = size/n_subdomains; 2873 /* Parmetis does not always give back nparts with small graphs! this should be taken into account */ 2874 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2875 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2876 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2877 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2878 2879 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2880 if (contiguous) { 2881 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2882 } else { 2883 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2884 } 2885 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2886 /* clean up */ 2887 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2888 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2889 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2890 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2891 } 2892 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2893 2894 /* assemble parallel IS for sends */ 2895 i = 1; 2896 if (color) i=0; 2897 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2898 2899 /* get back IS */ 2900 *is_sends = ranks_send_to; 2901 PetscFunctionReturn(0); 2902 } 2903 2904 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2905 2906 #undef __FUNCT__ 2907 #define __FUNCT__ "MatISSubassemble" 2908 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 2909 { 2910 Mat local_mat; 2911 Mat_IS *matis; 2912 IS is_sends_internal; 2913 PetscInt rows,cols; 2914 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 2915 PetscBool ismatis,isdense,destroy_mat; 2916 ISLocalToGlobalMapping l2gmap; 2917 PetscInt* l2gmap_indices; 2918 const PetscInt* is_indices; 2919 MatType new_local_type; 2920 /* buffers */ 2921 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2922 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 2923 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2924 /* MPI */ 2925 MPI_Comm comm,comm_n; 2926 PetscSubcomm subcomm; 2927 PetscMPIInt n_sends,n_recvs,commsize; 2928 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 2929 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 2930 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 2931 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 2932 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 2933 PetscErrorCode ierr; 2934 2935 PetscFunctionBegin; 2936 /* TODO: add missing checks */ 2937 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 2938 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 2939 PetscValidLogicalCollectiveEnum(mat,reuse,5); 2940 PetscValidLogicalCollectiveInt(mat,nis,7); 2941 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2942 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 2943 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2944 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2945 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2946 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2947 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2948 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 2949 PetscInt mrows,mcols,mnrows,mncols; 2950 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2951 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2952 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2953 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2954 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2955 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2956 } 2957 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2958 PetscValidLogicalCollectiveInt(mat,bs,0); 2959 /* prepare IS for sending if not provided */ 2960 if (!is_sends) { 2961 PetscBool pcontig = PETSC_TRUE; 2962 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 2963 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 2964 } else { 2965 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2966 is_sends_internal = is_sends; 2967 } 2968 2969 /* get pointer of MATIS data */ 2970 matis = (Mat_IS*)mat->data; 2971 2972 /* get comm */ 2973 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2974 2975 /* compute number of sends */ 2976 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2977 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2978 2979 /* compute number of receives */ 2980 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2981 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 2982 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2983 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2984 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2985 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2986 ierr = PetscFree(iflags);CHKERRQ(ierr); 2987 2988 /* restrict comm if requested */ 2989 subcomm = 0; 2990 destroy_mat = PETSC_FALSE; 2991 if (restrict_comm) { 2992 PetscMPIInt color,rank,subcommsize; 2993 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2994 color = 0; 2995 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 2996 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2997 subcommsize = commsize - subcommsize; 2998 /* check if reuse has been requested */ 2999 if (reuse == MAT_REUSE_MATRIX) { 3000 if (*mat_n) { 3001 PetscMPIInt subcommsize2; 3002 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3003 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3004 comm_n = PetscObjectComm((PetscObject)*mat_n); 3005 } else { 3006 comm_n = PETSC_COMM_SELF; 3007 } 3008 } else { /* MAT_INITIAL_MATRIX */ 3009 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3010 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3011 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3012 comm_n = subcomm->comm; 3013 } 3014 /* flag to destroy *mat_n if not significative */ 3015 if (color) destroy_mat = PETSC_TRUE; 3016 } else { 3017 comm_n = comm; 3018 } 3019 3020 /* prepare send/receive buffers */ 3021 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3022 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3023 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3024 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3025 if (nis) { 3026 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr); 3027 ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr); 3028 } 3029 3030 /* Get data from local matrices */ 3031 if (!isdense) { 3032 /* TODO: See below some guidelines on how to prepare the local buffers */ 3033 /* 3034 send_buffer_vals should contain the raw values of the local matrix 3035 send_buffer_idxs should contain: 3036 - MatType_PRIVATE type 3037 - PetscInt size_of_l2gmap 3038 - PetscInt global_row_indices[size_of_l2gmap] 3039 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3040 */ 3041 } else { 3042 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3043 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3044 ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr); 3045 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3046 send_buffer_idxs[1] = i; 3047 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3048 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3049 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3050 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3051 for (i=0;i<n_sends;i++) { 3052 ilengths_vals[is_indices[i]] = len*len; 3053 ilengths_idxs[is_indices[i]] = len+2; 3054 } 3055 } 3056 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3057 /* additional is (if any) */ 3058 if (nis) { 3059 PetscMPIInt psum; 3060 PetscInt j; 3061 for (j=0,psum=0;j<nis;j++) { 3062 PetscInt plen; 3063 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3064 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3065 psum += len+1; /* indices + lenght */ 3066 } 3067 ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr); 3068 for (j=0,psum=0;j<nis;j++) { 3069 PetscInt plen; 3070 const PetscInt *is_array_idxs; 3071 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3072 send_buffer_idxs_is[psum] = plen; 3073 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3074 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3075 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3076 psum += plen+1; /* indices + lenght */ 3077 } 3078 for (i=0;i<n_sends;i++) { 3079 ilengths_idxs_is[is_indices[i]] = psum; 3080 } 3081 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3082 } 3083 3084 buf_size_idxs = 0; 3085 buf_size_vals = 0; 3086 buf_size_idxs_is = 0; 3087 for (i=0;i<n_recvs;i++) { 3088 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3089 buf_size_vals += (PetscInt)olengths_vals[i]; 3090 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3091 } 3092 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3093 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3094 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3095 3096 /* get new tags for clean communications */ 3097 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3098 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3099 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3100 3101 /* allocate for requests */ 3102 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3103 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3104 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3105 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3106 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3107 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3108 3109 /* communications */ 3110 ptr_idxs = recv_buffer_idxs; 3111 ptr_vals = recv_buffer_vals; 3112 ptr_idxs_is = recv_buffer_idxs_is; 3113 for (i=0;i<n_recvs;i++) { 3114 source_dest = onodes[i]; 3115 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3116 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3117 ptr_idxs += olengths_idxs[i]; 3118 ptr_vals += olengths_vals[i]; 3119 if (nis) { 3120 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3121 ptr_idxs_is += olengths_idxs_is[i]; 3122 } 3123 } 3124 for (i=0;i<n_sends;i++) { 3125 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3126 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3127 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3128 if (nis) { 3129 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3130 } 3131 } 3132 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3133 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3134 3135 /* assemble new l2g map */ 3136 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3137 ptr_idxs = recv_buffer_idxs; 3138 buf_size_idxs = 0; 3139 for (i=0;i<n_recvs;i++) { 3140 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3141 ptr_idxs += olengths_idxs[i]; 3142 } 3143 ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr); 3144 ptr_idxs = recv_buffer_idxs; 3145 buf_size_idxs = 0; 3146 for (i=0;i<n_recvs;i++) { 3147 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3148 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3149 ptr_idxs += olengths_idxs[i]; 3150 } 3151 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3152 ierr = ISLocalToGlobalMappingCreate(comm_n,1,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3153 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3154 3155 /* infer new local matrix type from received local matrices type */ 3156 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3157 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3158 if (n_recvs) { 3159 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3160 ptr_idxs = recv_buffer_idxs; 3161 for (i=0;i<n_recvs;i++) { 3162 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3163 new_local_type_private = MATAIJ_PRIVATE; 3164 break; 3165 } 3166 ptr_idxs += olengths_idxs[i]; 3167 } 3168 switch (new_local_type_private) { 3169 case MATDENSE_PRIVATE: 3170 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3171 new_local_type = MATSEQAIJ; 3172 bs = 1; 3173 } else { /* if I receive only 1 dense matrix */ 3174 new_local_type = MATSEQDENSE; 3175 bs = 1; 3176 } 3177 break; 3178 case MATAIJ_PRIVATE: 3179 new_local_type = MATSEQAIJ; 3180 bs = 1; 3181 break; 3182 case MATBAIJ_PRIVATE: 3183 new_local_type = MATSEQBAIJ; 3184 break; 3185 case MATSBAIJ_PRIVATE: 3186 new_local_type = MATSEQSBAIJ; 3187 break; 3188 default: 3189 SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3190 break; 3191 } 3192 } else { /* by default, new_local_type is seqdense */ 3193 new_local_type = MATSEQDENSE; 3194 bs = 1; 3195 } 3196 3197 /* create MATIS object if needed */ 3198 if (reuse == MAT_INITIAL_MATRIX) { 3199 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3200 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3201 } else { 3202 /* it also destroys the local matrices */ 3203 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3204 } 3205 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3206 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3207 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3208 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3209 3210 /* set values */ 3211 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3212 ptr_vals = recv_buffer_vals; 3213 ptr_idxs = recv_buffer_idxs; 3214 for (i=0;i<n_recvs;i++) { 3215 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3216 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3217 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3218 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3219 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3220 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3221 } else { 3222 /* TODO */ 3223 } 3224 ptr_idxs += olengths_idxs[i]; 3225 ptr_vals += olengths_vals[i]; 3226 } 3227 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3228 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3229 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3230 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3231 3232 #if 0 3233 if (!restrict_comm) { /* check */ 3234 Vec lvec,rvec; 3235 PetscReal infty_error; 3236 3237 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3238 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3239 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3240 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3241 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3242 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3243 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3244 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3245 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3246 } 3247 #endif 3248 3249 /* assemble new additional is (if any) */ 3250 if (nis) { 3251 PetscInt **temp_idxs,*count_is,j,psum; 3252 3253 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3254 ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr); 3255 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3256 ptr_idxs = recv_buffer_idxs_is; 3257 psum = 0; 3258 for (i=0;i<n_recvs;i++) { 3259 for (j=0;j<nis;j++) { 3260 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3261 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3262 psum += plen; 3263 ptr_idxs += plen+1; /* shift pointer to received data */ 3264 } 3265 } 3266 ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr); 3267 ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr); 3268 for (i=1;i<nis;i++) { 3269 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3270 } 3271 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3272 ptr_idxs = recv_buffer_idxs_is; 3273 for (i=0;i<n_recvs;i++) { 3274 for (j=0;j<nis;j++) { 3275 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3276 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3277 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3278 ptr_idxs += plen+1; /* shift pointer to received data */ 3279 } 3280 } 3281 for (i=0;i<nis;i++) { 3282 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3283 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3284 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3285 } 3286 ierr = PetscFree(count_is);CHKERRQ(ierr); 3287 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3288 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3289 } 3290 /* free workspace */ 3291 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3292 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3293 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3294 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3295 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3296 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3297 if (isdense) { 3298 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3299 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3300 } else { 3301 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3302 } 3303 if (nis) { 3304 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3305 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3306 } 3307 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3308 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3309 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3310 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3311 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3312 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3313 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3314 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3315 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3316 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3317 ierr = PetscFree(onodes);CHKERRQ(ierr); 3318 if (nis) { 3319 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3320 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3321 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3322 } 3323 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3324 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3325 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3326 for (i=0;i<nis;i++) { 3327 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3328 } 3329 } 3330 PetscFunctionReturn(0); 3331 } 3332 3333 /* temporary hack into ksp private data structure */ 3334 #include <petsc-private/kspimpl.h> 3335 3336 #undef __FUNCT__ 3337 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3338 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3339 { 3340 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3341 PC_IS *pcis = (PC_IS*)pc->data; 3342 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3343 MatNullSpace CoarseNullSpace=NULL; 3344 ISLocalToGlobalMapping coarse_islg; 3345 IS coarse_is,*isarray; 3346 PetscInt i,im_active=-1,active_procs=-1; 3347 PetscInt nis,nisdofs,nisneu; 3348 PC pc_temp; 3349 PCType coarse_pc_type; 3350 KSPType coarse_ksp_type; 3351 PetscBool multilevel_requested,multilevel_allowed; 3352 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3353 Mat t_coarse_mat_is; 3354 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3355 PetscMPIInt all_procs; 3356 PetscBool csin_ml,csin_ds,csin,csin_type_simple; 3357 PetscBool compute_vecs = PETSC_FALSE; 3358 PetscErrorCode ierr; 3359 3360 PetscFunctionBegin; 3361 /* Assign global numbering to coarse dofs */ 3362 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3363 compute_vecs = PETSC_TRUE; 3364 PetscInt ocoarse_size; 3365 ocoarse_size = pcbddc->coarse_size; 3366 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3367 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3368 /* see if we can avoid some work */ 3369 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3370 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3371 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3372 coarse_reuse = PETSC_FALSE; 3373 } else { /* we can safely reuse already computed coarse matrix */ 3374 coarse_reuse = PETSC_TRUE; 3375 } 3376 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3377 coarse_reuse = PETSC_FALSE; 3378 } 3379 /* reset any subassembling information */ 3380 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3381 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3382 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3383 coarse_reuse = PETSC_TRUE; 3384 } 3385 3386 /* count "active" (i.e. with positive local size) and "void" processes */ 3387 im_active = !!(pcis->n); 3388 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3389 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3390 void_procs = all_procs-active_procs; 3391 csin_type_simple = PETSC_TRUE; 3392 if (pcbddc->current_level) { 3393 csin_ml = PETSC_TRUE; 3394 ncoarse_ml = void_procs; 3395 csin_ds = PETSC_TRUE; 3396 ncoarse_ds = void_procs; 3397 if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 3398 } else { 3399 csin_ml = PETSC_FALSE; 3400 ncoarse_ml = all_procs; 3401 if (void_procs) { 3402 csin_ds = PETSC_TRUE; 3403 ncoarse_ds = void_procs; 3404 csin_type_simple = PETSC_FALSE; 3405 } else { 3406 csin_ds = PETSC_FALSE; 3407 ncoarse_ds = all_procs; 3408 } 3409 } 3410 3411 /* 3412 test if we can go multilevel: three conditions must be satisfied: 3413 - we have not exceeded the number of levels requested 3414 - we can actually subassemble the active processes 3415 - we can find a suitable number of MPI processes where we can place the subassembled problem 3416 */ 3417 multilevel_allowed = PETSC_FALSE; 3418 multilevel_requested = PETSC_FALSE; 3419 if (pcbddc->current_level < pcbddc->max_levels) { 3420 multilevel_requested = PETSC_TRUE; 3421 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3422 multilevel_allowed = PETSC_FALSE; 3423 } else { 3424 multilevel_allowed = PETSC_TRUE; 3425 } 3426 } 3427 /* determine number of process partecipating to coarse solver */ 3428 if (multilevel_allowed) { 3429 ncoarse = ncoarse_ml; 3430 csin = csin_ml; 3431 } else { 3432 ncoarse = ncoarse_ds; 3433 csin = csin_ds; 3434 } 3435 3436 /* creates temporary l2gmap and IS for coarse indexes */ 3437 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3438 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3439 3440 /* creates temporary MATIS object for coarse matrix */ 3441 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3442 #if 0 3443 { 3444 PetscViewer viewer; 3445 char filename[256]; 3446 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3447 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3448 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3449 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3450 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3451 } 3452 #endif 3453 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3454 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3455 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3456 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3457 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3458 3459 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3460 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3461 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3462 const PetscInt *idxs; 3463 ISLocalToGlobalMapping tmap; 3464 3465 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3466 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3467 /* allocate space for temporary storage */ 3468 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3469 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3470 /* allocate for IS array */ 3471 nisdofs = pcbddc->n_ISForDofsLocal; 3472 nisneu = !!pcbddc->NeumannBoundariesLocal; 3473 nis = nisdofs + nisneu; 3474 ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr); 3475 /* dofs splitting */ 3476 for (i=0;i<nisdofs;i++) { 3477 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3478 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3479 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3480 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3481 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3482 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3483 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3484 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3485 } 3486 /* neumann boundaries */ 3487 if (pcbddc->NeumannBoundariesLocal) { 3488 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3489 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3490 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3491 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3492 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3493 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3494 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3495 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3496 } 3497 /* free memory */ 3498 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3499 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3500 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3501 } else { 3502 nis = 0; 3503 nisdofs = 0; 3504 nisneu = 0; 3505 isarray = NULL; 3506 } 3507 /* destroy no longer needed map */ 3508 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3509 3510 /* restrict on coarse candidates (if needed) */ 3511 coarse_mat_is = NULL; 3512 if (csin) { 3513 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3514 PetscInt j,tissize,*nisindices; 3515 PetscInt *coarse_candidates; 3516 const PetscInt* tisindices; 3517 /* get coarse candidates' ranks in pc communicator */ 3518 ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr); 3519 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3520 for (i=0,j=0;i<all_procs;i++) { 3521 if (!coarse_candidates[i]) { 3522 coarse_candidates[j]=i; 3523 j++; 3524 } 3525 } 3526 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3527 /* get a suitable subassembling pattern */ 3528 if (csin_type_simple) { 3529 PetscMPIInt rank; 3530 PetscInt issize,isidx; 3531 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3532 if (im_active) { 3533 issize = 1; 3534 isidx = (PetscInt)rank; 3535 } else { 3536 issize = 0; 3537 isidx = -1; 3538 } 3539 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3540 } else { 3541 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3542 } 3543 if (pcbddc->dbg_flag) { 3544 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3545 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3546 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3547 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3548 for (i=0;i<j;i++) { 3549 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3550 } 3551 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3552 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3553 } 3554 /* shift the pattern on coarse candidates */ 3555 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3556 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3557 ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr); 3558 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3559 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3560 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3561 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3562 } 3563 if (pcbddc->dbg_flag) { 3564 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3565 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3566 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3567 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3568 } 3569 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3570 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3571 } else { 3572 if (pcbddc->dbg_flag) { 3573 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3574 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3575 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3576 } 3577 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3578 coarse_mat_is = t_coarse_mat_is; 3579 } 3580 3581 /* create local to global scatters for coarse problem */ 3582 if (compute_vecs) { 3583 PetscInt lrows; 3584 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3585 if (coarse_mat_is) { 3586 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3587 } else { 3588 lrows = 0; 3589 } 3590 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3591 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3592 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3593 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3594 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3595 } 3596 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3597 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3598 3599 /* set defaults for coarse KSP and PC */ 3600 if (multilevel_allowed) { 3601 coarse_ksp_type = KSPRICHARDSON; 3602 coarse_pc_type = PCBDDC; 3603 } else { 3604 coarse_ksp_type = KSPPREONLY; 3605 coarse_pc_type = PCREDUNDANT; 3606 } 3607 3608 /* print some info if requested */ 3609 if (pcbddc->dbg_flag) { 3610 if (!multilevel_allowed) { 3611 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3612 if (multilevel_requested) { 3613 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3614 } else if (pcbddc->max_levels) { 3615 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3616 } 3617 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3618 } 3619 } 3620 3621 /* create the coarse KSP object only once with defaults */ 3622 if (coarse_mat_is) { 3623 MatReuse coarse_mat_reuse; 3624 PetscViewer dbg_viewer = NULL; 3625 if (pcbddc->dbg_flag) { 3626 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3627 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3628 } 3629 if (!pcbddc->coarse_ksp) { 3630 char prefix[256],str_level[16]; 3631 size_t len; 3632 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3633 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3634 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3635 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3636 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3637 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3638 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3639 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3640 /* prefix */ 3641 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3642 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3643 if (!pcbddc->current_level) { 3644 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3645 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3646 } else { 3647 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3648 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3649 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3650 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3651 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3652 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3653 } 3654 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3655 /* allow user customization */ 3656 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3657 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3658 } 3659 3660 /* get some info after set from options */ 3661 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3662 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3663 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3664 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3665 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3666 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3667 isbddc = PETSC_FALSE; 3668 } 3669 if (isredundant) { 3670 KSP inner_ksp; 3671 PC inner_pc; 3672 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3673 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3674 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3675 } 3676 3677 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3678 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3679 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3680 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3681 if (nisdofs) { 3682 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3683 for (i=0;i<nisdofs;i++) { 3684 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3685 } 3686 } 3687 if (nisneu) { 3688 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3689 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3690 } 3691 3692 /* assemble coarse matrix */ 3693 if (coarse_reuse) { 3694 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3695 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3696 coarse_mat_reuse = MAT_REUSE_MATRIX; 3697 } else { 3698 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3699 } 3700 if (isbddc || isnn) { 3701 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3702 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3703 if (pcbddc->dbg_flag) { 3704 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3705 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3706 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3707 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3708 } 3709 } 3710 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3711 } else { 3712 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3713 } 3714 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3715 3716 /* propagate symmetry info to coarse matrix */ 3717 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3718 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3719 3720 /* set operators */ 3721 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3722 if (pcbddc->dbg_flag) { 3723 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3724 } 3725 } else { /* processes non partecipating to coarse solver (if any) */ 3726 coarse_mat = 0; 3727 } 3728 ierr = PetscFree(isarray);CHKERRQ(ierr); 3729 #if 0 3730 { 3731 PetscViewer viewer; 3732 char filename[256]; 3733 sprintf(filename,"coarse_mat.m"); 3734 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3735 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3736 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3737 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3738 } 3739 #endif 3740 3741 /* Compute coarse null space (special handling by BDDC only) */ 3742 if (pcbddc->NullSpace) { 3743 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3744 } 3745 3746 if (pcbddc->coarse_ksp) { 3747 Vec crhs,csol; 3748 PetscBool ispreonly; 3749 if (CoarseNullSpace) { 3750 if (isbddc) { 3751 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3752 } else { 3753 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3754 } 3755 } 3756 /* setup coarse ksp */ 3757 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3758 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3759 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3760 /* hack */ 3761 if (!csol) { 3762 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3763 } 3764 if (!crhs) { 3765 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3766 } 3767 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3768 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3769 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3770 KSP check_ksp; 3771 KSPType check_ksp_type; 3772 PC check_pc; 3773 Vec check_vec,coarse_vec; 3774 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 3775 PetscInt its; 3776 PetscBool compute_eigs; 3777 PetscReal *eigs_r,*eigs_c; 3778 PetscInt neigs; 3779 const char *prefix; 3780 3781 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3782 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3783 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3784 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3785 if (ispreonly) { 3786 check_ksp_type = KSPPREONLY; 3787 compute_eigs = PETSC_FALSE; 3788 } else { 3789 check_ksp_type = KSPGMRES; 3790 compute_eigs = PETSC_TRUE; 3791 } 3792 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3793 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3794 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3795 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3796 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 3797 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 3798 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 3799 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 3800 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3801 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3802 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3803 /* create random vec */ 3804 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 3805 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 3806 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3807 if (CoarseNullSpace) { 3808 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3809 } 3810 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3811 /* solve coarse problem */ 3812 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 3813 if (CoarseNullSpace) { 3814 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 3815 } 3816 /* set eigenvalue estimation if preonly has not been requested */ 3817 if (compute_eigs) { 3818 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr); 3819 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr); 3820 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 3821 lambda_max = eigs_r[neigs-1]; 3822 lambda_min = eigs_r[0]; 3823 if (pcbddc->use_coarse_estimates) { 3824 if (lambda_max>lambda_min) { 3825 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3826 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3827 } 3828 } 3829 } 3830 3831 /* check coarse problem residual error */ 3832 if (pcbddc->dbg_flag) { 3833 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 3834 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3835 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 3836 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3837 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3838 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3839 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3840 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 3841 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 3842 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 3843 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3844 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3845 if (compute_eigs) { 3846 PetscReal lambda_max_s,lambda_min_s; 3847 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 3848 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3849 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 3850 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 3851 for (i=0;i<neigs;i++) { 3852 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 3853 } 3854 } 3855 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3856 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3857 } 3858 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3859 if (compute_eigs) { 3860 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 3861 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 3862 } 3863 } 3864 } 3865 /* print additional info */ 3866 if (pcbddc->dbg_flag) { 3867 /* waits until all processes reaches this point */ 3868 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 3869 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3870 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3871 } 3872 3873 /* free memory */ 3874 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3875 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3876 PetscFunctionReturn(0); 3877 } 3878 3879 #undef __FUNCT__ 3880 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3881 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3882 { 3883 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3884 PC_IS* pcis = (PC_IS*)pc->data; 3885 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3886 PetscInt i,coarse_size; 3887 PetscInt *local_primal_indices; 3888 PetscErrorCode ierr; 3889 3890 PetscFunctionBegin; 3891 /* Compute global number of coarse dofs */ 3892 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3893 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3894 } 3895 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3896 3897 /* check numbering */ 3898 if (pcbddc->dbg_flag) { 3899 PetscScalar coarsesum,*array; 3900 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 3901 3902 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3903 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3904 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3905 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3906 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3907 for (i=0;i<pcbddc->local_primal_size;i++) { 3908 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3909 } 3910 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3911 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3912 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3913 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3914 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3915 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3916 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3917 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3918 for (i=0;i<pcis->n;i++) { 3919 if (array[i] == 1.0) { 3920 set_error = PETSC_TRUE; 3921 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3922 } 3923 } 3924 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3925 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3926 for (i=0;i<pcis->n;i++) { 3927 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3928 } 3929 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3930 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3931 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3932 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3933 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3934 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3935 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 3936 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3937 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3938 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3939 for (i=0;i<pcbddc->local_primal_size;i++) { 3940 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3941 } 3942 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3943 } 3944 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3945 if (set_error_reduced) { 3946 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 3947 } 3948 } 3949 /* get back data */ 3950 *coarse_size_n = coarse_size; 3951 *local_primal_indices_n = local_primal_indices; 3952 PetscFunctionReturn(0); 3953 } 3954 3955 #undef __FUNCT__ 3956 #define __FUNCT__ "PCBDDCGlobalToLocal" 3957 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 3958 { 3959 IS localis_t; 3960 PetscInt i,lsize,*idxs,n; 3961 PetscScalar *vals; 3962 PetscErrorCode ierr; 3963 3964 PetscFunctionBegin; 3965 /* get indices in local ordering exploiting local to global map */ 3966 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 3967 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3968 for (i=0;i<lsize;i++) vals[i] = 1.0; 3969 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3970 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 3971 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 3972 if (idxs) { /* multilevel guard */ 3973 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3974 } 3975 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 3976 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3977 ierr = PetscFree(vals);CHKERRQ(ierr); 3978 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 3979 /* now compute set in local ordering */ 3980 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3981 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3982 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 3983 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 3984 for (i=0,lsize=0;i<n;i++) { 3985 if (PetscRealPart(vals[i]) > 0.5) { 3986 lsize++; 3987 } 3988 } 3989 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 3990 for (i=0,lsize=0;i<n;i++) { 3991 if (PetscRealPart(vals[i]) > 0.5) { 3992 idxs[lsize++] = i; 3993 } 3994 } 3995 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 3996 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 3997 *localis = localis_t; 3998 PetscFunctionReturn(0); 3999 } 4000 4001 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4002 #undef __FUNCT__ 4003 #define __FUNCT__ "PCBDDCMatMult_Private" 4004 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4005 { 4006 PCBDDCChange_ctx change_ctx; 4007 PetscErrorCode ierr; 4008 4009 PetscFunctionBegin; 4010 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4011 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4012 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4013 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4014 PetscFunctionReturn(0); 4015 } 4016 4017 #undef __FUNCT__ 4018 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4019 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4020 { 4021 PCBDDCChange_ctx change_ctx; 4022 PetscErrorCode ierr; 4023 4024 PetscFunctionBegin; 4025 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4026 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4027 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4028 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4029 PetscFunctionReturn(0); 4030 } 4031