1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCSetUpSolvers" 10 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscScalar *coarse_submat_vals; 14 PetscErrorCode ierr; 15 16 PetscFunctionBegin; 17 /* Compute matrix after change of basis and extract local submatrices */ 18 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D */ 21 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 22 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 23 24 /* Setup local solvers ksp_D and ksp_R */ 25 /* PCBDDCSetUpLocalScatters should be called first! */ 26 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 27 28 /* Change global null space passed in by the user if change of basis has been requested */ 29 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 30 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 31 } 32 33 /* 34 Setup local correction and local part of coarse basis. 35 Gives back the dense local part of the coarse matrix in column major ordering 36 */ 37 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 38 39 /* Compute total number of coarse nodes and setup coarse solver */ 40 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 41 42 /* free */ 43 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 #undef __FUNCT__ 48 #define __FUNCT__ "PCBDDCResetCustomization" 49 PetscErrorCode PCBDDCResetCustomization(PC pc) 50 { 51 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 52 PetscErrorCode ierr; 53 54 PetscFunctionBegin; 55 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 57 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 59 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 61 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 62 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 63 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 64 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 65 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 #undef __FUNCT__ 70 #define __FUNCT__ "PCBDDCResetTopography" 71 PetscErrorCode PCBDDCResetTopography(PC pc) 72 { 73 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 74 PetscErrorCode ierr; 75 76 PetscFunctionBegin; 77 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 78 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 79 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 80 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 81 PetscFunctionReturn(0); 82 } 83 84 #undef __FUNCT__ 85 #define __FUNCT__ "PCBDDCResetSolvers" 86 PetscErrorCode PCBDDCResetSolvers(PC pc) 87 { 88 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 89 PetscErrorCode ierr; 90 91 PetscFunctionBegin; 92 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 93 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 100 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 101 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 102 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 103 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 104 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 106 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 107 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 108 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 109 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 110 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 111 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 112 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 113 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 114 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 115 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 116 PetscFunctionReturn(0); 117 } 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 121 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 122 { 123 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 124 PC_IS *pcis = (PC_IS*)pc->data; 125 VecType impVecType; 126 PetscInt n_constraints,n_R,old_size; 127 PetscErrorCode ierr; 128 129 PetscFunctionBegin; 130 if (!pcbddc->ConstraintMatrix) { 131 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 132 } 133 /* get sizes */ 134 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 135 n_R = pcis->n-pcbddc->n_actual_vertices; 136 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 137 /* local work vectors (try to avoid unneeded work)*/ 138 /* R nodes */ 139 old_size = -1; 140 if (pcbddc->vec1_R) { 141 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 142 } 143 if (n_R != old_size) { 144 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 145 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 146 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 147 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 148 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 149 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 150 } 151 /* local primal dofs */ 152 old_size = -1; 153 if (pcbddc->vec1_P) { 154 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 155 } 156 if (pcbddc->local_primal_size != old_size) { 157 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 158 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 159 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 160 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 161 } 162 /* local explicit constraints */ 163 old_size = -1; 164 if (pcbddc->vec1_C) { 165 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 166 } 167 if (n_constraints && n_constraints != old_size) { 168 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 169 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 170 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 171 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 172 } 173 PetscFunctionReturn(0); 174 } 175 176 #undef __FUNCT__ 177 #define __FUNCT__ "PCBDDCSetUpCorrection" 178 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 179 { 180 PetscErrorCode ierr; 181 /* pointers to pcis and pcbddc */ 182 PC_IS* pcis = (PC_IS*)pc->data; 183 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 184 /* submatrices of local problem */ 185 Mat A_RV,A_VR,A_VV; 186 /* working matrices */ 187 Mat M1,M2,M3,C_CR; 188 /* working vectors */ 189 Vec vec1_C,vec2_C,vec1_V,vec2_V; 190 /* additional working stuff */ 191 IS is_aux; 192 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 193 const PetscScalar *array,*row_cmat_values; 194 const PetscInt *row_cmat_indices,*idx_R_local; 195 PetscInt *idx_V_B,*auxindices; 196 PetscInt n_vertices,n_constraints,size_of_constraint; 197 PetscInt i,j,n_R,n_D,n_B; 198 PetscBool unsymmetric_check; 199 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 200 MatType impMatType; 201 /* some shortcuts to scalars */ 202 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 203 /* for debugging purposes */ 204 PetscReal *coarsefunctions_errors,*constraints_errors; 205 206 PetscFunctionBegin; 207 /* get number of vertices (corners plus constraints with change of basis) 208 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 209 n_vertices = pcbddc->n_actual_vertices; 210 n_constraints = pcbddc->local_primal_size-n_vertices; 211 /* Set Non-overlapping dimensions */ 212 n_B = pcis->n_B; n_D = pcis->n - n_B; 213 n_R = pcis->n-n_vertices; 214 215 /* Set types for local objects needed by BDDC precondtioner */ 216 impMatType = MATSEQDENSE; 217 218 /* Allocating some extra storage just to be safe */ 219 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 220 for (i=0;i<pcis->n;i++) auxindices[i]=i; 221 222 /* vertices in boundary numbering */ 223 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 224 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 225 if (i != n_vertices) { 226 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 227 } 228 229 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 230 if (n_constraints) { 231 /* see if we can save some allocations */ 232 if (pcbddc->local_auxmat2) { 233 PetscInt on_R,on_constraints; 234 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 235 if (on_R != n_R || on_constraints != n_constraints) { 236 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 237 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 238 } 239 } 240 /* work vectors */ 241 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 242 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 243 /* auxiliary matrices */ 244 if (!pcbddc->local_auxmat2) { 245 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 246 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 247 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 248 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 249 } 250 251 /* Extract constraints on R nodes: C_{CR} */ 252 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 253 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 254 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 255 256 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 257 for (i=0;i<n_constraints;i++) { 258 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 259 /* Get row of constraint matrix in R numbering */ 260 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 261 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 262 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 263 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 264 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 265 /* Solve for row of constraint matrix in R numbering */ 266 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 267 /* Set values in local_auxmat2 */ 268 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 269 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 270 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 271 } 272 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 273 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 274 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 275 276 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 277 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 278 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 279 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 280 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 281 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 282 ierr = MatSetUp(M1);CHKERRQ(ierr); 283 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 284 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 285 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 286 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 287 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 288 ierr = MatDestroy(&M2);CHKERRQ(ierr); 289 ierr = MatDestroy(&M3);CHKERRQ(ierr); 290 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 291 if (!pcbddc->local_auxmat1) { 292 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 293 } else { 294 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 295 } 296 } 297 298 /* Get submatrices from subdomain matrix */ 299 if (n_vertices) { 300 PetscInt ibs,mbs; 301 PetscBool issbaij; 302 Mat newmat; 303 304 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 305 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 306 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 307 if (ibs != mbs) { /* need to convert to SEQAIJ */ 308 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 309 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 310 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 311 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 312 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 313 } else { 314 /* this is safe */ 315 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 316 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 317 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 318 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 319 /* which of the two approaches is faster? */ 320 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 321 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 322 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 323 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 324 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 325 } else { 326 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 327 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 328 } 329 } 330 ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 331 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 332 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 333 } 334 335 /* Matrix of coarse basis functions (local) */ 336 if (pcbddc->coarse_phi_B) { 337 PetscInt on_B,on_primal; 338 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 339 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 340 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 341 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 342 } 343 } 344 if (pcbddc->coarse_phi_D) { 345 PetscInt on_D,on_primal; 346 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 347 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 348 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 349 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 350 } 351 } 352 if (!pcbddc->coarse_phi_B) { 353 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 354 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 355 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 356 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 357 } 358 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 359 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 360 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 361 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 362 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 363 } 364 365 if (pcbddc->dbg_flag) { 366 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 367 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 368 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 369 } 370 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 371 ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr); 372 373 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 374 375 /* vertices */ 376 for (i=0;i<n_vertices;i++) { 377 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 378 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 379 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 380 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 381 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 382 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 383 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 384 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 385 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 386 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 387 if (n_constraints) { 388 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 389 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 390 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 391 } 392 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 393 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 394 395 /* Set values in coarse basis function and subdomain part of coarse_mat */ 396 /* coarse basis functions */ 397 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 398 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 399 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 400 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 401 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 402 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 403 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 404 if (pcbddc->switch_static || pcbddc->dbg_flag) { 405 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 406 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 407 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 408 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 409 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 410 } 411 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 412 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 413 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 415 if (n_constraints) { 416 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 417 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 418 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 419 } 420 421 /* check */ 422 if (pcbddc->dbg_flag) { 423 /* assemble subdomain vector on local nodes */ 424 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 425 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 426 if (n_R) { 427 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 428 } 429 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 430 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 431 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 432 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 433 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 434 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 435 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 436 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 437 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 438 if (n_constraints) { 439 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 440 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 441 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 442 } 443 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 444 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 445 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 446 /* check saddle point solution */ 447 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 448 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 449 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 450 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 451 /* shift by the identity matrix */ 452 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 453 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 454 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 455 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 456 } 457 } 458 459 /* constraints */ 460 for (i=0;i<n_constraints;i++) { 461 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 462 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 463 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 464 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 465 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 466 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 467 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 468 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 469 if (n_vertices) { 470 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 471 } 472 /* Set values in coarse basis function and subdomain part of coarse_mat */ 473 /* coarse basis functions */ 474 j = i+n_vertices; /* don't touch this! */ 475 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 476 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 479 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 480 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 481 if (pcbddc->switch_static || pcbddc->dbg_flag) { 482 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 483 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 484 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 485 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 486 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 487 } 488 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 489 if (n_vertices) { 490 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 491 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 492 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 493 } 494 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 495 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 496 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 497 498 if (pcbddc->dbg_flag) { 499 /* assemble subdomain vector on nodes */ 500 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 501 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 502 if (n_R) { 503 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 504 } 505 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 506 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 507 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 508 /* assemble subdomain vector of lagrange multipliers */ 509 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 510 if (n_vertices) { 511 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 512 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 513 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 514 } 515 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 516 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 517 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 518 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 519 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 520 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 521 /* check saddle point solution */ 522 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 523 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 524 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 525 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 526 /* shift by the identity matrix */ 527 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 528 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 529 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 530 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 531 } 532 } 533 /* call assembling routines for local coarse basis */ 534 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 535 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 536 if (pcbddc->switch_static || pcbddc->dbg_flag) { 537 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 538 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 539 } 540 541 /* compute other basis functions for non-symmetric problems */ 542 /*ierr = MatIsSymmetric(pc->pmat,0.,&pcbddc->issym);CHKERRQ(ierr);*/ 543 { /* this is a temporary workaround since seqbaij matrices does not have support for symmetry checking */ 544 PetscBool setsym; 545 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&pcbddc->issym);CHKERRQ(ierr); 546 if (!setsym) pcbddc->issym = PETSC_FALSE; 547 } 548 549 if (!pcbddc->issym) { 550 if (!pcbddc->coarse_psi_B) { 551 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 552 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 553 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 554 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 555 } 556 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 557 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 558 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 559 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 560 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 561 } 562 for (i=0;i<pcbddc->local_primal_size;i++) { 563 if (n_constraints) { 564 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 565 for (j=0;j<n_constraints;j++) { 566 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 567 } 568 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 569 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 570 } 571 if (i<n_vertices) { 572 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 573 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 574 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 575 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 576 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 577 if (n_constraints) { 578 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 579 } 580 } else { 581 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 582 } 583 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 584 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 585 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 586 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 587 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 588 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 589 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 590 if (i<n_vertices) { 591 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 592 } 593 if (pcbddc->switch_static || pcbddc->dbg_flag) { 594 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 595 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 596 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 597 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 598 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 599 } 600 601 if (pcbddc->dbg_flag) { 602 /* assemble subdomain vector on nodes */ 603 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 604 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 605 if (n_R) { 606 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 607 } 608 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 609 if (i<n_vertices) { 610 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 611 } 612 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 613 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 614 /* assemble subdomain vector of lagrange multipliers */ 615 for (j=0;j<pcbddc->local_primal_size;j++) { 616 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 617 } 618 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 619 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 620 /* check saddle point solution */ 621 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 622 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 623 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 624 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 625 /* shift by the identity matrix */ 626 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 627 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 628 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 629 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 630 } 631 } 632 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 633 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 634 if (pcbddc->switch_static || pcbddc->dbg_flag) { 635 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 636 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 637 } 638 unsymmetric_check = PETSC_TRUE; 639 } else { /* take references to already computed coarse basis */ 640 unsymmetric_check = PETSC_FALSE; 641 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 642 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 643 if (pcbddc->coarse_phi_D) { 644 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 645 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 646 } 647 } 648 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 649 /* Checking coarse_sub_mat and coarse basis functios */ 650 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 651 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 652 if (pcbddc->dbg_flag) { 653 Mat coarse_sub_mat; 654 Mat AUXMAT,TM1,TM2,TM3,TM4; 655 Mat coarse_phi_D,coarse_phi_B; 656 Mat coarse_psi_D,coarse_psi_B; 657 Mat A_II,A_BB,A_IB,A_BI; 658 MatType checkmattype=MATSEQAIJ; 659 PetscReal real_value; 660 661 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 662 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 663 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 664 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 665 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 666 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 667 if (unsymmetric_check) { 668 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 669 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 670 } 671 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 672 673 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 674 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 675 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 676 if (unsymmetric_check) { 677 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 678 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 679 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 680 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 681 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 682 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 683 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 684 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 685 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 686 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 687 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 688 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 689 } else { 690 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 691 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 692 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 693 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 694 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 695 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 696 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 697 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 698 } 699 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 700 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 701 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 702 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 703 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 704 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 705 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 708 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 709 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 710 for (i=0;i<pcbddc->local_primal_size;i++) { 711 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 712 } 713 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 714 for (i=0;i<pcbddc->local_primal_size;i++) { 715 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 716 } 717 if (unsymmetric_check) { 718 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 719 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 720 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 721 } 722 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 723 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 724 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 725 } 726 } 727 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 728 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 729 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 730 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 731 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 732 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 733 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 734 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 735 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 736 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 737 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 738 if (unsymmetric_check) { 739 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 740 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 741 } 742 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 743 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 744 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 745 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 746 } 747 /* free memory */ 748 if (n_vertices) { 749 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 750 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 751 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 752 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 753 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 754 } 755 if (n_constraints) { 756 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 757 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 758 ierr = MatDestroy(&M1);CHKERRQ(ierr); 759 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 760 } 761 ierr = PetscFree(auxindices);CHKERRQ(ierr); 762 /* get back data */ 763 *coarse_submat_vals_n = coarse_submat_vals; 764 PetscFunctionReturn(0); 765 } 766 767 #undef __FUNCT__ 768 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 769 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 770 { 771 PC_IS* pcis = (PC_IS*)(pc->data); 772 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 773 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 774 PetscBool issbaij,isseqaij; 775 /* manage repeated solves */ 776 MatReuse reuse; 777 PetscErrorCode ierr; 778 779 PetscFunctionBegin; 780 if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) { 781 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 782 } 783 /* get mat flags */ 784 reuse = MAT_INITIAL_MATRIX; 785 if (pc->setupcalled) { 786 if (pc->flag == SAME_NONZERO_PATTERN) { 787 reuse = MAT_REUSE_MATRIX; 788 } else { 789 reuse = MAT_INITIAL_MATRIX; 790 } 791 } 792 if (reuse == MAT_INITIAL_MATRIX) { 793 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 794 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 795 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 796 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 797 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 798 } 799 800 /* transform local matrices if needed */ 801 if (pcbddc->ChangeOfBasisMatrix) { 802 Mat *change_mat_all; 803 IS is_local,is_global; 804 PetscBool sorted; 805 PetscInt *idxs_perm; 806 807 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_local);CHKERRQ(ierr); 808 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 809 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 810 ierr = ISSorted(is_global,&sorted);CHKERRQ(ierr); 811 if (!sorted) { 812 const PetscInt *idxs; 813 PetscInt *idxs_sorted,i; 814 815 ierr = PetscMalloc1(pcis->n,&idxs_perm);CHKERRQ(ierr); 816 ierr = PetscMalloc1(pcis->n,&idxs_sorted);CHKERRQ(ierr); 817 for (i=0;i<pcis->n;i++) { 818 idxs_perm[i] = i; 819 } 820 ierr = ISGetIndices(is_global,&idxs);CHKERRQ(ierr); 821 ierr = PetscSortIntWithPermutation(pcis->n,idxs,idxs_perm);CHKERRQ(ierr); 822 for (i=0;i<pcis->n;i++) { 823 idxs_sorted[i] = idxs[idxs_perm[i]]; 824 } 825 ierr = ISRestoreIndices(is_global,&idxs);CHKERRQ(ierr); 826 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 827 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs_sorted,PETSC_OWN_POINTER,&is_global);CHKERRQ(ierr); 828 } 829 830 /* get change of basis on the whole set of local dofs */ 831 ierr = MatGetSubMatrices(pcbddc->ChangeOfBasisMatrix,1,&is_global,&is_global,MAT_INITIAL_MATRIX,&change_mat_all);CHKERRQ(ierr); 832 833 if (!sorted) { 834 Mat new_mat; 835 IS is_perm; 836 PetscInt *idxs,i; 837 838 ierr = PetscMalloc1(pcis->n,&idxs);CHKERRQ(ierr); 839 for (i=0;i<pcis->n;i++) { 840 idxs[idxs_perm[i]] = i; 841 } 842 ierr = PetscFree(idxs_perm);CHKERRQ(ierr); 843 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcis->n,idxs,PETSC_OWN_POINTER,&is_perm);CHKERRQ(ierr); 844 ierr = ISSetPermutation(is_perm);CHKERRQ(ierr); 845 ierr = MatPermute(change_mat_all[0],is_perm,is_perm,&new_mat);CHKERRQ(ierr); 846 ierr = MatDestroy(&change_mat_all[0]);CHKERRQ(ierr); 847 change_mat_all[0] = new_mat; 848 ierr = ISDestroy(&is_perm);CHKERRQ(ierr); 849 } 850 851 /* check */ 852 if (pcbddc->dbg_flag) { 853 Vec x,x_change; 854 PetscReal error; 855 856 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 857 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 858 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 859 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 860 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 861 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 ierr = MatMult(change_mat_all[0],pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 863 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 864 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 865 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 866 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 867 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 868 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 869 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 870 ierr = VecDestroy(&x);CHKERRQ(ierr); 871 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 872 } 873 874 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 875 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 876 if (isseqaij) { 877 ierr = MatPtAP(matis->A,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 878 } else { 879 Mat work_mat; 880 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 881 ierr = MatPtAP(work_mat,change_mat_all[0],reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 882 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 883 } 884 /* 885 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 886 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 887 */ 888 ierr = MatDestroyMatrices(1,&change_mat_all);CHKERRQ(ierr); 889 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 890 } else { 891 /* without change of basis, the local matrix is unchanged */ 892 if (!pcbddc->local_mat) { 893 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 894 pcbddc->local_mat = matis->A; 895 } 896 } 897 898 /* get submatrices */ 899 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 900 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 901 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 902 if (!issbaij) { 903 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 904 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 905 } else { 906 Mat newmat; 907 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 908 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 909 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 910 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 911 } 912 PetscFunctionReturn(0); 913 } 914 915 #undef __FUNCT__ 916 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 917 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 918 { 919 PC_IS* pcis = (PC_IS*)(pc->data); 920 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 921 IS is_aux1,is_aux2; 922 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 923 PetscInt n_vertices,i,j,n_R,n_D,n_B; 924 PetscInt vbs,bs; 925 PetscBT bitmask; 926 PetscErrorCode ierr; 927 928 PetscFunctionBegin; 929 /* 930 No need to setup local scatters if 931 - primal space is unchanged 932 AND 933 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 934 AND 935 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 936 */ 937 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 938 PetscFunctionReturn(0); 939 } 940 /* destroy old objects */ 941 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 942 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 943 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 944 /* Set Non-overlapping dimensions */ 945 n_B = pcis->n_B; n_D = pcis->n - n_B; 946 n_vertices = pcbddc->n_actual_vertices; 947 /* create auxiliary bitmask */ 948 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 949 for (i=0;i<n_vertices;i++) { 950 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 951 } 952 953 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 954 ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr); 955 for (i=0, n_R=0; i<pcis->n; i++) { 956 if (!PetscBTLookup(bitmask,i)) { 957 idx_R_local[n_R] = i; 958 n_R++; 959 } 960 } 961 962 /* Block code */ 963 vbs = 1; 964 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 965 if (bs>1 && !(n_vertices%bs)) { 966 PetscBool is_blocked = PETSC_TRUE; 967 PetscInt *vary; 968 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 969 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 970 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 971 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 972 for (i=0; i<n_vertices; i++) { 973 if (vary[i]!=0 && vary[i]!=bs) { 974 is_blocked = PETSC_FALSE; 975 break; 976 } 977 } 978 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 979 vbs = bs; 980 for (i=0;i<n_R/vbs;i++) { 981 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 982 } 983 } 984 ierr = PetscFree(vary);CHKERRQ(ierr); 985 } 986 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 987 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 988 989 /* print some info if requested */ 990 if (pcbddc->dbg_flag) { 991 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 992 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 993 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 994 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 995 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 996 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 997 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 998 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 999 } 1000 1001 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1002 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1003 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr); 1004 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr); 1005 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1006 for (i=0; i<n_D; i++) { 1007 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1008 } 1009 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1010 for (i=0, j=0; i<n_R; i++) { 1011 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1012 aux_array1[j++] = i; 1013 } 1014 } 1015 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1016 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1017 for (i=0, j=0; i<n_B; i++) { 1018 if (!PetscBTLookup(bitmask,is_indices[i])) { 1019 aux_array2[j++] = i; 1020 } 1021 } 1022 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1023 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1024 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1025 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1026 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1027 1028 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1029 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1030 for (i=0, j=0; i<n_R; i++) { 1031 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1032 aux_array1[j++] = i; 1033 } 1034 } 1035 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1036 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1037 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1038 } 1039 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1040 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1047 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1048 { 1049 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1050 PC_IS *pcis = (PC_IS*)pc->data; 1051 PC pc_temp; 1052 Mat A_RR; 1053 MatReuse reuse; 1054 PetscScalar m_one = -1.0; 1055 PetscReal value; 1056 PetscInt n_D,n_R,ibs,mbs; 1057 PetscBool use_exact,use_exact_reduced,issbaij; 1058 PetscErrorCode ierr; 1059 /* prefixes stuff */ 1060 char dir_prefix[256],neu_prefix[256],str_level[16]; 1061 size_t len; 1062 1063 PetscFunctionBegin; 1064 1065 /* compute prefixes */ 1066 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1067 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1068 if (!pcbddc->current_level) { 1069 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1070 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1071 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1072 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1073 } else { 1074 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1075 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1076 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1077 len -= 15; /* remove "pc_bddc_coarse_" */ 1078 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1079 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1080 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1081 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1082 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1083 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1084 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1085 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1086 } 1087 1088 /* DIRICHLET PROBLEM */ 1089 /* Matrix for Dirichlet problem is pcis->A_II */ 1090 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1091 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1092 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1093 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1094 /* default */ 1095 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1096 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1097 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1098 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1099 if (issbaij) { 1100 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1101 } else { 1102 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1103 } 1104 /* Allow user's customization */ 1105 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1106 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1107 } 1108 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1109 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1110 if (!n_D) { 1111 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1112 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1113 } 1114 /* Set Up KSP for Dirichlet problem of BDDC */ 1115 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1116 /* set ksp_D into pcis data */ 1117 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1118 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1119 pcis->ksp_D = pcbddc->ksp_D; 1120 1121 /* NEUMANN PROBLEM */ 1122 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1123 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1124 if (pcbddc->ksp_R) { /* already created ksp */ 1125 PetscInt nn_R; 1126 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1127 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1128 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1129 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1130 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1131 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1132 reuse = MAT_INITIAL_MATRIX; 1133 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1134 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1135 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1136 reuse = MAT_INITIAL_MATRIX; 1137 } else { /* safe to reuse the matrix */ 1138 reuse = MAT_REUSE_MATRIX; 1139 } 1140 } 1141 /* last check */ 1142 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1143 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1144 reuse = MAT_INITIAL_MATRIX; 1145 } 1146 } else { /* first time, so we need to create the matrix */ 1147 reuse = MAT_INITIAL_MATRIX; 1148 } 1149 /* extract A_RR */ 1150 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1151 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1152 if (ibs != mbs) { 1153 Mat newmat; 1154 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1155 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1156 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1157 } else { 1158 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1159 } 1160 if (!pcbddc->ksp_R) { /* create object if not present */ 1161 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1162 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1163 /* default */ 1164 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1165 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1166 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1167 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1168 if (issbaij) { 1169 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1170 } else { 1171 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1172 } 1173 /* Allow user's customization */ 1174 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1175 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1176 } 1177 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1178 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1179 if (!n_R) { 1180 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1181 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1182 } 1183 /* Set Up KSP for Neumann problem of BDDC */ 1184 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1185 1186 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1187 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1188 /* Dirichlet */ 1189 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1190 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1191 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1192 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1193 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1194 /* need to be adapted? */ 1195 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1196 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1197 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1198 /* print info */ 1199 if (pcbddc->dbg_flag) { 1200 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1201 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1202 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1203 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1204 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1205 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1206 } 1207 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1208 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1209 } 1210 1211 /* Neumann */ 1212 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1213 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1214 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1215 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1216 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1217 /* need to be adapted? */ 1218 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1219 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1220 /* print info */ 1221 if (pcbddc->dbg_flag) { 1222 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1223 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1224 } 1225 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1226 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1227 } 1228 } 1229 /* free Neumann problem's matrix */ 1230 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1236 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1237 { 1238 PetscErrorCode ierr; 1239 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1240 1241 PetscFunctionBegin; 1242 if (applytranspose) { 1243 if (pcbddc->local_auxmat1) { 1244 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1245 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1246 } 1247 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1248 } else { 1249 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1250 if (pcbddc->local_auxmat1) { 1251 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1252 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1253 } 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1261 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1262 { 1263 PetscErrorCode ierr; 1264 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1265 PC_IS* pcis = (PC_IS*) (pc->data); 1266 const PetscScalar zero = 0.0; 1267 1268 PetscFunctionBegin; 1269 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1270 if (applytranspose) { 1271 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1272 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1273 } else { 1274 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1275 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1276 } 1277 /* start communications from local primal nodes to rhs of coarse solver */ 1278 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1279 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1280 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1281 1282 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1283 /* TODO remove null space when doing multilevel */ 1284 if (pcbddc->coarse_ksp) { 1285 if (applytranspose) { 1286 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1287 } else { 1288 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1289 } 1290 } 1291 1292 /* Local solution on R nodes */ 1293 if (pcis->n) { 1294 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1295 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1296 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1297 if (pcbddc->switch_static) { 1298 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1299 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1300 } 1301 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1302 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1303 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1304 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1305 if (pcbddc->switch_static) { 1306 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1307 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1308 } 1309 } 1310 1311 /* communications from coarse sol to local primal nodes */ 1312 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1313 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1314 1315 /* Sum contributions from two levels */ 1316 if (applytranspose) { 1317 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1318 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1319 } else { 1320 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1321 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1322 } 1323 PetscFunctionReturn(0); 1324 } 1325 1326 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1329 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1330 { 1331 PetscErrorCode ierr; 1332 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1333 PetscScalar *array,*array2; 1334 Vec from,to; 1335 1336 PetscFunctionBegin; 1337 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1338 from = pcbddc->coarse_vec; 1339 to = pcbddc->vec1_P; 1340 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1341 Vec tvec; 1342 PetscInt lsize; 1343 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1344 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1345 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1346 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1347 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1348 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1349 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1350 } 1351 } else { /* from local to global -> put data in coarse right hand side */ 1352 from = pcbddc->vec1_P; 1353 to = pcbddc->coarse_vec; 1354 } 1355 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1361 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1362 { 1363 PetscErrorCode ierr; 1364 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1365 PetscScalar *array,*array2; 1366 Vec from,to; 1367 1368 PetscFunctionBegin; 1369 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1370 from = pcbddc->coarse_vec; 1371 to = pcbddc->vec1_P; 1372 } else { /* from local to global -> put data in coarse right hand side */ 1373 from = pcbddc->vec1_P; 1374 to = pcbddc->coarse_vec; 1375 } 1376 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1377 if (smode == SCATTER_FORWARD) { 1378 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1379 Vec tvec; 1380 PetscInt lsize; 1381 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1382 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1383 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1384 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1385 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1386 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1387 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1388 } 1389 } 1390 PetscFunctionReturn(0); 1391 } 1392 1393 /* uncomment for testing purposes */ 1394 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1395 #undef __FUNCT__ 1396 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1397 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1398 { 1399 PetscErrorCode ierr; 1400 PC_IS* pcis = (PC_IS*)(pc->data); 1401 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1402 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1403 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1404 MatType impMatType=MATSEQAIJ; 1405 /* one and zero */ 1406 PetscScalar one=1.0,zero=0.0; 1407 /* space to store constraints and their local indices */ 1408 PetscScalar *temp_quadrature_constraint; 1409 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1410 /* iterators */ 1411 PetscInt i,j,k,total_counts,temp_start_ptr; 1412 /* stuff to store connected components stored in pcbddc->mat_graph */ 1413 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1414 PetscInt n_ISForFaces,n_ISForEdges; 1415 /* near null space stuff */ 1416 MatNullSpace nearnullsp; 1417 const Vec *nearnullvecs; 1418 Vec *localnearnullsp; 1419 PetscBool nnsp_has_cnst; 1420 PetscInt nnsp_size; 1421 PetscScalar *array; 1422 /* BLAS integers */ 1423 PetscBLASInt lwork,lierr; 1424 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1425 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1426 /* LAPACK working arrays for SVD or POD */ 1427 PetscBool skip_lapack; 1428 PetscScalar *work; 1429 PetscReal *singular_vals; 1430 #if defined(PETSC_USE_COMPLEX) 1431 PetscReal *rwork; 1432 #endif 1433 #if defined(PETSC_MISSING_LAPACK_GESVD) 1434 PetscBLASInt Blas_one_2=1; 1435 PetscScalar *temp_basis,*correlation_mat; 1436 #else 1437 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1438 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1439 #endif 1440 /* reuse */ 1441 PetscInt olocal_primal_size; 1442 PetscInt *oprimal_indices_local_idxs; 1443 /* change of basis */ 1444 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1445 PetscBool boolforchange,qr_needed; 1446 PetscBT touched,change_basis,qr_needed_idx; 1447 /* auxiliary stuff */ 1448 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1449 /* some quantities */ 1450 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1451 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1452 1453 1454 PetscFunctionBegin; 1455 /* Destroy Mat objects computed previously */ 1456 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1457 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1458 /* Get index sets for faces, edges and vertices from graph */ 1459 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1460 /* free unneeded index sets */ 1461 if (!pcbddc->use_vertices) { 1462 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1463 } 1464 if (!pcbddc->use_edges) { 1465 for (i=0;i<n_ISForEdges;i++) { 1466 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1467 } 1468 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1469 n_ISForEdges = 0; 1470 } 1471 if (!pcbddc->use_faces) { 1472 for (i=0;i<n_ISForFaces;i++) { 1473 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1474 } 1475 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1476 n_ISForFaces = 0; 1477 } 1478 /* HACKS (the following two blocks of code) */ 1479 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1480 pcbddc->use_change_of_basis = PETSC_TRUE; 1481 if (!ISForEdges) { 1482 pcbddc->use_change_on_faces = PETSC_TRUE; 1483 } 1484 } 1485 if (pcbddc->NullSpace) { 1486 /* use_change_of_basis should be consistent among processors */ 1487 PetscBool tbool[2],gbool[2]; 1488 tbool [0] = pcbddc->use_change_of_basis; 1489 tbool [1] = pcbddc->use_change_on_faces; 1490 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1491 pcbddc->use_change_of_basis = gbool[0]; 1492 pcbddc->use_change_on_faces = gbool[1]; 1493 } 1494 /* print some info */ 1495 if (pcbddc->dbg_flag) { 1496 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1497 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1498 i = 0; 1499 if (ISForVertices) { 1500 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1501 } 1502 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1503 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1504 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1505 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1506 } 1507 /* check if near null space is attached to global mat */ 1508 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1509 if (nearnullsp) { 1510 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1511 /* remove any stored info */ 1512 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1513 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1514 /* store information for BDDC solver reuse */ 1515 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1516 pcbddc->onearnullspace = nearnullsp; 1517 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1518 for (i=0;i<nnsp_size;i++) { 1519 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1520 } 1521 } else { /* if near null space is not provided BDDC uses constants by default */ 1522 nnsp_size = 0; 1523 nnsp_has_cnst = PETSC_TRUE; 1524 } 1525 /* get max number of constraints on a single cc */ 1526 max_constraints = nnsp_size; 1527 if (nnsp_has_cnst) max_constraints++; 1528 1529 /* 1530 Evaluate maximum storage size needed by the procedure 1531 - temp_indices will contain start index of each constraint stored as follows 1532 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1533 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1534 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1535 */ 1536 total_counts = n_ISForFaces+n_ISForEdges; 1537 total_counts *= max_constraints; 1538 n_vertices = 0; 1539 if (ISForVertices) { 1540 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1541 } 1542 total_counts += n_vertices; 1543 ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr); 1544 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1545 total_counts = 0; 1546 max_size_of_constraint = 0; 1547 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1548 if (i<n_ISForEdges) { 1549 used_IS = &ISForEdges[i]; 1550 } else { 1551 used_IS = &ISForFaces[i-n_ISForEdges]; 1552 } 1553 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1554 total_counts += j; 1555 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1556 } 1557 total_counts *= max_constraints; 1558 total_counts += n_vertices; 1559 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1560 /* get local part of global near null space vectors */ 1561 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1562 for (k=0;k<nnsp_size;k++) { 1563 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1564 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1565 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1566 } 1567 1568 /* whether or not to skip lapack calls */ 1569 skip_lapack = PETSC_TRUE; 1570 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1571 1572 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1573 if (!skip_lapack) { 1574 PetscScalar temp_work; 1575 #if defined(PETSC_MISSING_LAPACK_GESVD) 1576 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1577 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1578 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1579 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1580 #if defined(PETSC_USE_COMPLEX) 1581 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1582 #endif 1583 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1584 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1585 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1586 lwork = -1; 1587 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1588 #if !defined(PETSC_USE_COMPLEX) 1589 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1590 #else 1591 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1592 #endif 1593 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1594 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1595 #else /* on missing GESVD */ 1596 /* SVD */ 1597 PetscInt max_n,min_n; 1598 max_n = max_size_of_constraint; 1599 min_n = max_constraints; 1600 if (max_size_of_constraint < max_constraints) { 1601 min_n = max_size_of_constraint; 1602 max_n = max_constraints; 1603 } 1604 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1605 #if defined(PETSC_USE_COMPLEX) 1606 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1607 #endif 1608 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1609 lwork = -1; 1610 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1611 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1612 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1613 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1614 #if !defined(PETSC_USE_COMPLEX) 1615 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1616 #else 1617 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1618 #endif 1619 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1620 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1621 #endif /* on missing GESVD */ 1622 /* Allocate optimal workspace */ 1623 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1624 ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr); 1625 } 1626 /* Now we can loop on constraining sets */ 1627 total_counts = 0; 1628 temp_indices[0] = 0; 1629 /* vertices */ 1630 if (ISForVertices) { 1631 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1632 if (nnsp_has_cnst) { /* consider all vertices */ 1633 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1634 for (i=0;i<n_vertices;i++) { 1635 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1636 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1637 total_counts++; 1638 } 1639 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1640 PetscBool used_vertex; 1641 for (i=0;i<n_vertices;i++) { 1642 used_vertex = PETSC_FALSE; 1643 k = 0; 1644 while (!used_vertex && k<nnsp_size) { 1645 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1646 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1647 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1648 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1649 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1650 total_counts++; 1651 used_vertex = PETSC_TRUE; 1652 } 1653 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1654 k++; 1655 } 1656 } 1657 } 1658 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1659 n_vertices = total_counts; 1660 } 1661 1662 /* edges and faces */ 1663 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1664 if (i<n_ISForEdges) { 1665 used_IS = &ISForEdges[i]; 1666 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1667 } else { 1668 used_IS = &ISForFaces[i-n_ISForEdges]; 1669 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1670 } 1671 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1672 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1673 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1674 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1675 /* change of basis should not be performed on local periodic nodes */ 1676 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1677 if (nnsp_has_cnst) { 1678 PetscScalar quad_value; 1679 temp_constraints++; 1680 if (!pcbddc->use_nnsp_true) { 1681 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1682 } else { 1683 quad_value = 1.0; 1684 } 1685 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1686 for (j=0;j<size_of_constraint;j++) { 1687 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1688 } 1689 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1690 total_counts++; 1691 } 1692 for (k=0;k<nnsp_size;k++) { 1693 PetscReal real_value; 1694 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1695 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1696 for (j=0;j<size_of_constraint;j++) { 1697 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1698 } 1699 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1700 /* check if array is null on the connected component */ 1701 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1702 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1703 if (real_value > 0.0) { /* keep indices and values */ 1704 temp_constraints++; 1705 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1706 total_counts++; 1707 } 1708 } 1709 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1710 valid_constraints = temp_constraints; 1711 if (!pcbddc->use_nnsp_true && temp_constraints) { 1712 if (temp_constraints == 1) { /* just normalize the constraint */ 1713 PetscScalar norm; 1714 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1715 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1716 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1717 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1718 } else { /* perform SVD */ 1719 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1720 1721 #if defined(PETSC_MISSING_LAPACK_GESVD) 1722 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1723 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1724 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1725 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1726 from that computed using LAPACKgesvd 1727 -> This is due to a different computation of eigenvectors in LAPACKheev 1728 -> The quality of the POD-computed basis will be the same */ 1729 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1730 /* Store upper triangular part of correlation matrix */ 1731 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1732 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1733 for (j=0;j<temp_constraints;j++) { 1734 for (k=0;k<j+1;k++) { 1735 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1736 } 1737 } 1738 /* compute eigenvalues and eigenvectors of correlation matrix */ 1739 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1740 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1741 #if !defined(PETSC_USE_COMPLEX) 1742 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1743 #else 1744 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1745 #endif 1746 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1747 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1748 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1749 j=0; 1750 while (j < temp_constraints && singular_vals[j] < tol) j++; 1751 total_counts=total_counts-j; 1752 valid_constraints = temp_constraints-j; 1753 /* scale and copy POD basis into used quadrature memory */ 1754 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1755 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1756 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1757 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1758 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1759 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1760 if (j<temp_constraints) { 1761 PetscInt ii; 1762 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1763 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1764 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1765 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1766 for (k=0;k<temp_constraints-j;k++) { 1767 for (ii=0;ii<size_of_constraint;ii++) { 1768 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1769 } 1770 } 1771 } 1772 #else /* on missing GESVD */ 1773 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1774 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1775 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1776 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1777 #if !defined(PETSC_USE_COMPLEX) 1778 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1779 #else 1780 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1781 #endif 1782 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1783 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1784 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1785 k = temp_constraints; 1786 if (k > size_of_constraint) k = size_of_constraint; 1787 j = 0; 1788 while (j < k && singular_vals[k-j-1] < tol) j++; 1789 total_counts = total_counts-temp_constraints+k-j; 1790 valid_constraints = k-j; 1791 #endif /* on missing GESVD */ 1792 } 1793 } 1794 /* setting change_of_basis flag is safe now */ 1795 if (boolforchange) { 1796 for (j=0;j<valid_constraints;j++) { 1797 PetscBTSet(change_basis,total_counts-j-1); 1798 } 1799 } 1800 } 1801 /* free index sets of faces, edges and vertices */ 1802 for (i=0;i<n_ISForFaces;i++) { 1803 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1804 } 1805 if (n_ISForFaces) { 1806 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1807 } 1808 for (i=0;i<n_ISForEdges;i++) { 1809 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1810 } 1811 if (n_ISForEdges) { 1812 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1813 } 1814 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1815 /* map temp_indices_to_constraint in boundary numbering */ 1816 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1817 if (i != temp_indices[total_counts]) { 1818 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1819 } 1820 1821 /* free workspace */ 1822 if (!skip_lapack) { 1823 ierr = PetscFree(work);CHKERRQ(ierr); 1824 #if defined(PETSC_USE_COMPLEX) 1825 ierr = PetscFree(rwork);CHKERRQ(ierr); 1826 #endif 1827 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1828 #if defined(PETSC_MISSING_LAPACK_GESVD) 1829 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1830 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1831 #endif 1832 } 1833 for (k=0;k<nnsp_size;k++) { 1834 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1835 } 1836 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1837 1838 /* set quantities in pcbddc data structure and store previous primal size */ 1839 /* n_vertices defines the number of subdomain corners in the primal space */ 1840 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1841 olocal_primal_size = pcbddc->local_primal_size; 1842 pcbddc->local_primal_size = total_counts; 1843 pcbddc->n_vertices = n_vertices; 1844 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1845 1846 /* Create constraint matrix */ 1847 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1848 /* so we need to set it up properly either with or without change of basis */ 1849 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1850 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1851 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1852 /* array to compute a local numbering of constraints : vertices first then constraints */ 1853 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1854 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1855 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1856 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1857 /* auxiliary stuff for basis change */ 1858 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1859 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1860 1861 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1862 total_primal_vertices=0; 1863 for (i=0;i<pcbddc->local_primal_size;i++) { 1864 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1865 if (size_of_constraint == 1) { 1866 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1867 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1868 aux_primal_minloc[total_primal_vertices]=0; 1869 total_primal_vertices++; 1870 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1871 PetscInt min_loc,min_index; 1872 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1873 /* find first untouched local node */ 1874 k = 0; 1875 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1876 min_index = global_indices[k]; 1877 min_loc = k; 1878 /* search the minimum among global nodes already untouched on the cc */ 1879 for (k=1;k<size_of_constraint;k++) { 1880 /* there can be more than one constraint on a single connected component */ 1881 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1882 min_index = global_indices[k]; 1883 min_loc = k; 1884 } 1885 } 1886 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1887 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1888 aux_primal_minloc[total_primal_vertices]=min_loc; 1889 total_primal_vertices++; 1890 } 1891 } 1892 /* determine if a QR strategy is needed for change of basis */ 1893 qr_needed = PETSC_FALSE; 1894 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1895 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1896 if (PetscBTLookup(change_basis,i)) { 1897 if (!pcbddc->use_qr_single) { 1898 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1899 j = 0; 1900 for (k=0;k<size_of_constraint;k++) { 1901 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1902 j++; 1903 } 1904 } 1905 /* found more than one primal dof on the cc */ 1906 if (j > 1) { 1907 PetscBTSet(qr_needed_idx,i); 1908 qr_needed = PETSC_TRUE; 1909 } 1910 } else { 1911 PetscBTSet(qr_needed_idx,i); 1912 qr_needed = PETSC_TRUE; 1913 } 1914 } 1915 } 1916 /* free workspace */ 1917 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1918 1919 /* permute indices in order to have a sorted set of vertices */ 1920 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1921 1922 /* nonzero structure of constraint matrix */ 1923 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1924 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1925 j=total_primal_vertices; 1926 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1927 if (!PetscBTLookup(change_basis,i)) { 1928 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1929 j++; 1930 } 1931 } 1932 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1933 ierr = PetscFree(nnz);CHKERRQ(ierr); 1934 /* set values in constraint matrix */ 1935 for (i=0;i<total_primal_vertices;i++) { 1936 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1937 } 1938 total_counts = total_primal_vertices; 1939 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1940 if (!PetscBTLookup(change_basis,i)) { 1941 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1942 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1943 total_counts++; 1944 } 1945 } 1946 /* assembling */ 1947 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1948 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1949 /* 1950 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1951 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1952 */ 1953 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1954 if (pcbddc->use_change_of_basis) { 1955 /* dual and primal dofs on a single cc */ 1956 PetscInt dual_dofs,primal_dofs; 1957 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1958 PetscInt primal_counter; 1959 /* working stuff for GEQRF */ 1960 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 1961 PetscBLASInt lqr_work; 1962 /* working stuff for UNGQR */ 1963 PetscScalar *gqr_work,lgqr_work_t; 1964 PetscBLASInt lgqr_work; 1965 /* working stuff for TRTRS */ 1966 PetscScalar *trs_rhs; 1967 PetscBLASInt Blas_NRHS; 1968 /* pointers for values insertion into change of basis matrix */ 1969 PetscInt *start_rows,*start_cols; 1970 PetscScalar *start_vals; 1971 /* working stuff for values insertion */ 1972 PetscBT is_primal; 1973 /* matrix sizes */ 1974 PetscInt global_size,local_size; 1975 /* work array for nonzeros */ 1976 PetscScalar *nnz_array; 1977 /* temporary change of basis */ 1978 Mat localChangeOfBasisMatrix; 1979 /* auxiliary work for global change of basis */ 1980 Vec nnz_vec; 1981 PetscInt *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz; 1982 PetscInt nvtxs,*xadj,*adjncy,*idxs_mapped; 1983 PetscScalar *vals; 1984 PetscBool done; 1985 1986 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 1987 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 1988 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1989 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1990 1991 /* nonzeros for local mat */ 1992 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 1993 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1994 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1995 if (PetscBTLookup(change_basis,i)) { 1996 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1997 if (PetscBTLookup(qr_needed_idx,i)) { 1998 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1999 } else { 2000 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 2001 /* get local primal index on the cc */ 2002 j = 0; 2003 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2004 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2005 } 2006 } 2007 } 2008 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2009 /* Set initial identity in the matrix */ 2010 for (i=0;i<pcis->n_B;i++) { 2011 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2012 } 2013 2014 if (pcbddc->dbg_flag) { 2015 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2016 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2017 } 2018 2019 2020 /* Now we loop on the constraints which need a change of basis */ 2021 /* 2022 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2023 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2024 2025 Basic blocks of change of basis matrix T computed by 2026 2027 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2028 2029 | 1 0 ... 0 s_1/S | 2030 | 0 1 ... 0 s_2/S | 2031 | ... | 2032 | 0 ... 1 s_{n-1}/S | 2033 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2034 2035 with S = \sum_{i=1}^n s_i^2 2036 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2037 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2038 2039 - QR decomposition of constraints otherwise 2040 */ 2041 if (qr_needed) { 2042 /* space to store Q */ 2043 ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr); 2044 /* first we issue queries for optimal work */ 2045 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2046 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2047 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2048 lqr_work = -1; 2049 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2050 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2051 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2052 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2053 lgqr_work = -1; 2054 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2055 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2056 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2057 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2058 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2059 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2060 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2061 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2062 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2063 /* array to store scaling factors for reflectors */ 2064 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2065 /* array to store rhs and solution of triangular solver */ 2066 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2067 /* allocating workspace for check */ 2068 if (pcbddc->dbg_flag) { 2069 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2070 } 2071 } 2072 /* array to store whether a node is primal or not */ 2073 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2074 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2075 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2076 if (i != total_primal_vertices) { 2077 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2078 } 2079 for (i=0;i<total_primal_vertices;i++) { 2080 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2081 } 2082 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2083 2084 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2085 /* -> using implicit ordering contained in temp_indices data */ 2086 total_counts = pcbddc->n_vertices; 2087 primal_counter = total_counts; 2088 while (total_counts<pcbddc->local_primal_size) { 2089 primal_dofs = 1; 2090 if (PetscBTLookup(change_basis,total_counts)) { 2091 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2092 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2093 primal_dofs++; 2094 } 2095 /* get constraint info */ 2096 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2097 dual_dofs = size_of_constraint-primal_dofs; 2098 2099 if (pcbddc->dbg_flag) { 2100 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2101 } 2102 2103 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2104 2105 /* copy quadrature constraints for change of basis check */ 2106 if (pcbddc->dbg_flag) { 2107 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2108 } 2109 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2110 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2111 2112 /* compute QR decomposition of constraints */ 2113 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2114 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2115 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2116 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2117 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2118 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2119 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2120 2121 /* explictly compute R^-T */ 2122 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2123 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2124 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2125 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2126 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2127 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2128 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2129 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2130 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2131 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2132 2133 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2134 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2135 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2136 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2137 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2138 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2139 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2140 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2141 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2142 2143 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2144 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2145 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2146 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2147 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2148 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2149 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2150 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2151 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2152 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2153 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2154 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2155 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2156 2157 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2158 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2159 /* insert cols for primal dofs */ 2160 for (j=0;j<primal_dofs;j++) { 2161 start_vals = &qr_basis[j*size_of_constraint]; 2162 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2163 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2164 } 2165 /* insert cols for dual dofs */ 2166 for (j=0,k=0;j<dual_dofs;k++) { 2167 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2168 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2169 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2170 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2171 j++; 2172 } 2173 } 2174 2175 /* check change of basis */ 2176 if (pcbddc->dbg_flag) { 2177 PetscInt ii,jj; 2178 PetscBool valid_qr=PETSC_TRUE; 2179 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2180 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2181 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2182 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2183 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2184 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2185 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2186 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2187 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2188 for (jj=0;jj<size_of_constraint;jj++) { 2189 for (ii=0;ii<primal_dofs;ii++) { 2190 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2191 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2192 } 2193 } 2194 if (!valid_qr) { 2195 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2196 for (jj=0;jj<size_of_constraint;jj++) { 2197 for (ii=0;ii<primal_dofs;ii++) { 2198 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2199 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2200 } 2201 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2202 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2203 } 2204 } 2205 } 2206 } else { 2207 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2208 } 2209 } 2210 } else { /* simple transformation block */ 2211 PetscInt row,col; 2212 PetscScalar val,norm; 2213 2214 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2215 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2216 for (j=0;j<size_of_constraint;j++) { 2217 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2218 if (!PetscBTLookup(is_primal,row)) { 2219 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2220 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2221 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2222 } else { 2223 for (k=0;k<size_of_constraint;k++) { 2224 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2225 if (row != col) { 2226 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2227 } else { 2228 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2229 } 2230 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2231 } 2232 } 2233 } 2234 if (pcbddc->dbg_flag) { 2235 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2236 } 2237 } 2238 /* increment primal counter */ 2239 primal_counter += primal_dofs; 2240 } else { 2241 if (pcbddc->dbg_flag) { 2242 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2243 } 2244 } 2245 /* increment constraint counter total_counts */ 2246 total_counts += primal_dofs; 2247 } 2248 2249 /* free workspace */ 2250 if (qr_needed) { 2251 if (pcbddc->dbg_flag) { 2252 ierr = PetscFree(work);CHKERRQ(ierr); 2253 } 2254 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2255 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2256 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2257 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2258 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2259 } 2260 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2261 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2262 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2263 2264 /* assembling of global change of variable */ 2265 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2266 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2267 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2268 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2269 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2270 ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr); 2271 2272 /* nonzeros (overestimated) */ 2273 ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr); 2274 ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr); 2275 ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr); 2276 for (i=0;i<pcis->n;i++) { 2277 nnz_array[i] = 1.0; 2278 idxs_all[i] = i; 2279 } 2280 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2281 for (i=0;i<pcis->n_B;i++) { 2282 nnz_array[idxs_B[i]] = nnz[i]; 2283 } 2284 if (pcis->n) { 2285 ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr); 2286 } 2287 ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr); 2288 ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr); 2289 ierr = PetscFree(nnz);CHKERRQ(ierr); 2290 ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr); 2291 ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr); 2292 ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2293 for (i=0;i<local_size;i++) { 2294 d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size); 2295 o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size); 2296 } 2297 ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2298 ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr); 2299 ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2300 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2301 2302 /* Set identity on dirichlet dofs */ 2303 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2304 for (i=0;i<pcis->n-pcis->n_B;i++) { 2305 PetscScalar one=1.0; 2306 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr); 2307 } 2308 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2309 2310 /* Set values at interface dofs */ 2311 done = PETSC_TRUE; 2312 ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2313 if (!done) { 2314 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2315 } 2316 ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2317 ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr); 2318 ierr = ISLocalToGlobalMappingApply(pcbddc->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr); 2319 for (i=0;i<nvtxs;i++) { 2320 PetscInt row,*cols,ncols; 2321 PetscScalar *mat_vals; 2322 2323 row = idxs_B[i]; 2324 ncols = xadj[i+1]-xadj[i]; 2325 cols = idxs_mapped+xadj[i]; 2326 mat_vals = vals+xadj[i]; 2327 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr); 2328 } 2329 ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2330 if (!done) { 2331 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2332 } 2333 ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2334 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2335 ierr = PetscFree(idxs_mapped);CHKERRQ(ierr); 2336 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2337 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2338 2339 /* check */ 2340 if (pcbddc->dbg_flag) { 2341 PetscReal error; 2342 Vec x,x_change; 2343 2344 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2345 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2346 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2347 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2348 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2349 ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2350 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 2351 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2352 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2353 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2354 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2355 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2356 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2357 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr); 2358 ierr = VecDestroy(&x);CHKERRQ(ierr); 2359 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2360 } 2361 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2362 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2363 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2364 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2365 } 2366 2367 /* set up change of basis context */ 2368 if (pcbddc->ChangeOfBasisMatrix) { 2369 PCBDDCChange_ctx change_ctx; 2370 2371 if (!pcbddc->new_global_mat) { 2372 PetscInt global_size,local_size; 2373 2374 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2375 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2376 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2377 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2378 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2379 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2380 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2381 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2382 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2383 } else { 2384 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2385 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2386 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2387 } 2388 if (!pcbddc->user_ChangeOfBasisMatrix) { 2389 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2390 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2391 } else { 2392 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2393 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2394 } 2395 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2396 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2397 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2398 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2399 } 2400 2401 /* get indices in local ordering for vertices and constraints */ 2402 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2403 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2404 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2405 } 2406 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2407 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2408 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2409 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2410 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2411 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2412 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2413 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2414 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2415 /* set quantities in PCBDDC data struct */ 2416 pcbddc->n_actual_vertices = i; 2417 /* check if a new primal space has been introduced */ 2418 pcbddc->new_primal_space_local = PETSC_TRUE; 2419 if (olocal_primal_size == pcbddc->local_primal_size) { 2420 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2421 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2422 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2423 } 2424 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2425 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2426 2427 /* flush dbg viewer */ 2428 if (pcbddc->dbg_flag) { 2429 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2430 } 2431 2432 /* free workspace */ 2433 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2434 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2435 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2436 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2437 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2438 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2439 PetscFunctionReturn(0); 2440 } 2441 2442 #undef __FUNCT__ 2443 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2444 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2445 { 2446 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2447 PC_IS *pcis = (PC_IS*)pc->data; 2448 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2449 PetscInt ierr,i,vertex_size; 2450 PetscViewer viewer=pcbddc->dbg_viewer; 2451 2452 PetscFunctionBegin; 2453 /* Reset previously computed graph */ 2454 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2455 /* Init local Graph struct */ 2456 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2457 2458 /* Check validity of the csr graph passed in by the user */ 2459 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2460 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2461 } 2462 2463 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2464 if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) { 2465 Mat mat_adj; 2466 const PetscInt *xadj,*adjncy; 2467 PetscBool flg_row=PETSC_TRUE; 2468 2469 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2470 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2471 if (!flg_row) { 2472 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2473 } 2474 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2475 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2476 if (!flg_row) { 2477 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2478 } 2479 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2480 pcbddc->deluxe_compute_rowadj = PETSC_FALSE; 2481 } 2482 2483 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2484 vertex_size = 1; 2485 if (pcbddc->user_provided_isfordofs) { 2486 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2487 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2488 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2489 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2490 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2491 } 2492 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2493 pcbddc->n_ISForDofs = 0; 2494 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2495 } 2496 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2497 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2498 } else { 2499 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2500 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2501 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2502 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2503 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2504 } 2505 } 2506 } 2507 2508 /* Setup of Graph */ 2509 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2510 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2511 } 2512 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2513 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2514 } 2515 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2516 2517 /* Graph's connected components analysis */ 2518 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2519 2520 /* print some info to stdout */ 2521 if (pcbddc->dbg_flag) { 2522 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2523 } 2524 2525 /* mark topography has done */ 2526 pcbddc->recompute_topography = PETSC_FALSE; 2527 PetscFunctionReturn(0); 2528 } 2529 2530 #undef __FUNCT__ 2531 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2532 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2533 { 2534 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2535 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2536 PetscErrorCode ierr; 2537 2538 PetscFunctionBegin; 2539 n = 0; 2540 vertices = 0; 2541 if (pcbddc->ConstraintMatrix) { 2542 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2543 for (i=0;i<local_primal_size;i++) { 2544 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2545 if (size_of_constraint == 1) n++; 2546 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2547 } 2548 if (vertices_idx) { 2549 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2550 n = 0; 2551 for (i=0;i<local_primal_size;i++) { 2552 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2553 if (size_of_constraint == 1) { 2554 vertices[n++]=row_cmat_indices[0]; 2555 } 2556 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2557 } 2558 } 2559 } 2560 *n_vertices = n; 2561 if (vertices_idx) *vertices_idx = vertices; 2562 PetscFunctionReturn(0); 2563 } 2564 2565 #undef __FUNCT__ 2566 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2567 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2568 { 2569 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2570 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2571 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2572 PetscBT touched; 2573 PetscErrorCode ierr; 2574 2575 /* This function assumes that the number of local constraints per connected component 2576 is not greater than the number of nodes defined for the connected component 2577 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2578 PetscFunctionBegin; 2579 n = 0; 2580 constraints_index = 0; 2581 if (pcbddc->ConstraintMatrix) { 2582 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2583 max_size_of_constraint = 0; 2584 for (i=0;i<local_primal_size;i++) { 2585 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2586 if (size_of_constraint > 1) { 2587 n++; 2588 } 2589 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2590 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2591 } 2592 if (constraints_idx) { 2593 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2594 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2595 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2596 n = 0; 2597 for (i=0;i<local_primal_size;i++) { 2598 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2599 if (size_of_constraint > 1) { 2600 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2601 /* find first untouched local node */ 2602 j = 0; 2603 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2604 min_index = row_cmat_global_indices[j]; 2605 min_loc = j; 2606 /* search the minimum among nodes not yet touched on the connected component 2607 since there can be more than one constraint on a single cc */ 2608 for (j=1;j<size_of_constraint;j++) { 2609 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2610 min_index = row_cmat_global_indices[j]; 2611 min_loc = j; 2612 } 2613 } 2614 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2615 constraints_index[n++] = row_cmat_indices[min_loc]; 2616 } 2617 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2618 } 2619 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2620 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2621 } 2622 } 2623 *n_constraints = n; 2624 if (constraints_idx) *constraints_idx = constraints_index; 2625 PetscFunctionReturn(0); 2626 } 2627 2628 #undef __FUNCT__ 2629 #define __FUNCT__ "PCBDDCSubsetNumbering" 2630 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2631 { 2632 Vec local_vec,global_vec; 2633 IS seqis,paris; 2634 VecScatter scatter_ctx; 2635 PetscScalar *array; 2636 PetscInt *temp_global_dofs; 2637 PetscScalar globalsum; 2638 PetscInt i,j,s; 2639 PetscInt nlocals,first_index,old_index,max_local; 2640 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2641 PetscMPIInt *dof_sizes,*dof_displs; 2642 PetscBool first_found; 2643 PetscErrorCode ierr; 2644 2645 PetscFunctionBegin; 2646 /* mpi buffers */ 2647 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2648 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2649 j = ( !rank_prec_comm ? size_prec_comm : 0); 2650 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2651 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2652 /* get maximum size of subset */ 2653 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2654 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2655 max_local = 0; 2656 for (i=0;i<n_local_dofs;i++) { 2657 if (max_local < temp_global_dofs[i] ) { 2658 max_local = temp_global_dofs[i]; 2659 } 2660 } 2661 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2662 max_global++; 2663 max_local = 0; 2664 for (i=0;i<n_local_dofs;i++) { 2665 if (max_local < local_dofs[i] ) { 2666 max_local = local_dofs[i]; 2667 } 2668 } 2669 max_local++; 2670 /* allocate workspace */ 2671 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2672 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2673 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2674 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2675 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2676 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2677 /* create scatter */ 2678 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2679 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2680 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2681 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2682 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2683 /* init array */ 2684 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2685 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2686 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2687 if (local_dofs_mult) { 2688 for (i=0;i<n_local_dofs;i++) { 2689 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2690 } 2691 } else { 2692 for (i=0;i<n_local_dofs;i++) { 2693 array[local_dofs[i]]=1.0; 2694 } 2695 } 2696 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2697 /* scatter into global vec and get total number of global dofs */ 2698 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2699 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2700 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2701 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2702 /* Fill global_vec with cumulative function for global numbering */ 2703 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2704 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2705 nlocals = 0; 2706 first_index = -1; 2707 first_found = PETSC_FALSE; 2708 for (i=0;i<s;i++) { 2709 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2710 first_found = PETSC_TRUE; 2711 first_index = i; 2712 } 2713 nlocals += (PetscInt)PetscRealPart(array[i]); 2714 } 2715 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2716 if (!rank_prec_comm) { 2717 dof_displs[0]=0; 2718 for (i=1;i<size_prec_comm;i++) { 2719 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2720 } 2721 } 2722 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2723 if (first_found) { 2724 array[first_index] += (PetscScalar)nlocals; 2725 old_index = first_index; 2726 for (i=first_index+1;i<s;i++) { 2727 if (PetscRealPart(array[i]) > 0.1) { 2728 array[i] += array[old_index]; 2729 old_index = i; 2730 } 2731 } 2732 } 2733 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2734 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2735 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2736 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2737 /* get global ordering of local dofs */ 2738 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2739 if (local_dofs_mult) { 2740 for (i=0;i<n_local_dofs;i++) { 2741 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2742 } 2743 } else { 2744 for (i=0;i<n_local_dofs;i++) { 2745 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2746 } 2747 } 2748 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2749 /* free workspace */ 2750 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2751 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2752 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2753 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2754 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2755 /* return pointer to global ordering of local dofs */ 2756 *global_numbering_subset = temp_global_dofs; 2757 PetscFunctionReturn(0); 2758 } 2759 2760 #undef __FUNCT__ 2761 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2762 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2763 { 2764 PetscInt i,j; 2765 PetscScalar *alphas; 2766 PetscErrorCode ierr; 2767 2768 PetscFunctionBegin; 2769 /* this implements stabilized Gram-Schmidt */ 2770 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2771 for (i=0;i<n;i++) { 2772 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2773 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2774 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2775 } 2776 ierr = PetscFree(alphas);CHKERRQ(ierr); 2777 PetscFunctionReturn(0); 2778 } 2779 2780 #undef __FUNCT__ 2781 #define __FUNCT__ "MatISGetSubassemblingPattern" 2782 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2783 { 2784 Mat subdomain_adj; 2785 IS new_ranks,ranks_send_to; 2786 MatPartitioning partitioner; 2787 Mat_IS *matis; 2788 PetscInt n_neighs,*neighs,*n_shared,**shared; 2789 PetscInt prank; 2790 PetscMPIInt size,rank,color; 2791 PetscInt *xadj,*adjncy,*oldranks; 2792 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2793 PetscInt i,local_size,threshold=0; 2794 PetscErrorCode ierr; 2795 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2796 PetscSubcomm subcomm; 2797 2798 PetscFunctionBegin; 2799 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2800 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2801 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2802 2803 /* Get info on mapping */ 2804 matis = (Mat_IS*)(mat->data); 2805 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2806 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2807 2808 /* build local CSR graph of subdomains' connectivity */ 2809 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2810 xadj[0] = 0; 2811 xadj[1] = PetscMax(n_neighs-1,0); 2812 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2813 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2814 2815 if (threshold) { 2816 PetscInt xadj_count = 0; 2817 for (i=1;i<n_neighs;i++) { 2818 if (n_shared[i] > threshold) { 2819 adjncy[xadj_count] = neighs[i]; 2820 adjncy_wgt[xadj_count] = n_shared[i]; 2821 xadj_count++; 2822 } 2823 } 2824 xadj[1] = xadj_count; 2825 } else { 2826 if (xadj[1]) { 2827 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2828 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2829 } 2830 } 2831 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2832 if (use_square) { 2833 for (i=0;i<xadj[1];i++) { 2834 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2835 } 2836 } 2837 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2838 2839 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 2840 2841 /* 2842 Restrict work on active processes only. 2843 */ 2844 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2845 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2846 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2847 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2848 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2849 if (color) { 2850 ierr = PetscFree(xadj);CHKERRQ(ierr); 2851 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2852 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2853 } else { 2854 PetscInt coarsening_ratio; 2855 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2856 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2857 prank = rank; 2858 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2859 /* 2860 for (i=0;i<size;i++) { 2861 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2862 } 2863 */ 2864 for (i=0;i<xadj[1];i++) { 2865 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2866 } 2867 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2868 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2869 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2870 2871 /* Partition */ 2872 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2873 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2874 if (use_vwgt) { 2875 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 2876 v_wgt[0] = local_size; 2877 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2878 } 2879 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2880 coarsening_ratio = size/n_subdomains; 2881 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2882 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2883 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2884 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2885 2886 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2887 if (contiguous) { 2888 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2889 } else { 2890 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2891 } 2892 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2893 /* clean up */ 2894 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2895 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2896 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2897 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2898 } 2899 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2900 2901 /* assemble parallel IS for sends */ 2902 i = 1; 2903 if (color) i=0; 2904 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2905 2906 /* get back IS */ 2907 *is_sends = ranks_send_to; 2908 PetscFunctionReturn(0); 2909 } 2910 2911 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2912 2913 #undef __FUNCT__ 2914 #define __FUNCT__ "MatISSubassemble" 2915 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 2916 { 2917 Mat local_mat; 2918 Mat_IS *matis; 2919 IS is_sends_internal; 2920 PetscInt rows,cols,new_local_rows; 2921 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 2922 PetscBool ismatis,isdense,newisdense,destroy_mat; 2923 ISLocalToGlobalMapping l2gmap; 2924 PetscInt* l2gmap_indices; 2925 const PetscInt* is_indices; 2926 MatType new_local_type; 2927 /* buffers */ 2928 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2929 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 2930 PetscInt *recv_buffer_idxs_local; 2931 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2932 /* MPI */ 2933 MPI_Comm comm,comm_n; 2934 PetscSubcomm subcomm; 2935 PetscMPIInt n_sends,n_recvs,commsize; 2936 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 2937 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 2938 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 2939 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 2940 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 2941 PetscErrorCode ierr; 2942 2943 PetscFunctionBegin; 2944 /* TODO: add missing checks */ 2945 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 2946 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 2947 PetscValidLogicalCollectiveEnum(mat,reuse,5); 2948 PetscValidLogicalCollectiveInt(mat,nis,7); 2949 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2950 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 2951 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2952 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2953 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2954 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2955 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2956 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 2957 PetscInt mrows,mcols,mnrows,mncols; 2958 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2959 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2960 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2961 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2962 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2963 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2964 } 2965 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2966 PetscValidLogicalCollectiveInt(mat,bs,0); 2967 /* prepare IS for sending if not provided */ 2968 if (!is_sends) { 2969 PetscBool pcontig = PETSC_TRUE; 2970 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 2971 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 2972 } else { 2973 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2974 is_sends_internal = is_sends; 2975 } 2976 2977 /* get pointer of MATIS data */ 2978 matis = (Mat_IS*)mat->data; 2979 2980 /* get comm */ 2981 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2982 2983 /* compute number of sends */ 2984 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2985 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2986 2987 /* compute number of receives */ 2988 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2989 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 2990 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2991 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2992 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2993 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2994 ierr = PetscFree(iflags);CHKERRQ(ierr); 2995 2996 /* restrict comm if requested */ 2997 subcomm = 0; 2998 destroy_mat = PETSC_FALSE; 2999 if (restrict_comm) { 3000 PetscMPIInt color,rank,subcommsize; 3001 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3002 color = 0; 3003 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3004 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3005 subcommsize = commsize - subcommsize; 3006 /* check if reuse has been requested */ 3007 if (reuse == MAT_REUSE_MATRIX) { 3008 if (*mat_n) { 3009 PetscMPIInt subcommsize2; 3010 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3011 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3012 comm_n = PetscObjectComm((PetscObject)*mat_n); 3013 } else { 3014 comm_n = PETSC_COMM_SELF; 3015 } 3016 } else { /* MAT_INITIAL_MATRIX */ 3017 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3018 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3019 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3020 comm_n = subcomm->comm; 3021 } 3022 /* flag to destroy *mat_n if not significative */ 3023 if (color) destroy_mat = PETSC_TRUE; 3024 } else { 3025 comm_n = comm; 3026 } 3027 3028 /* prepare send/receive buffers */ 3029 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3030 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3031 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3032 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3033 if (nis) { 3034 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr); 3035 ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr); 3036 } 3037 3038 /* Get data from local matrices */ 3039 if (!isdense) { 3040 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3041 /* TODO: See below some guidelines on how to prepare the local buffers */ 3042 /* 3043 send_buffer_vals should contain the raw values of the local matrix 3044 send_buffer_idxs should contain: 3045 - MatType_PRIVATE type 3046 - PetscInt size_of_l2gmap 3047 - PetscInt global_row_indices[size_of_l2gmap] 3048 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3049 */ 3050 } else { 3051 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3052 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3053 ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr); 3054 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3055 send_buffer_idxs[1] = i; 3056 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3057 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3058 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3059 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3060 for (i=0;i<n_sends;i++) { 3061 ilengths_vals[is_indices[i]] = len*len; 3062 ilengths_idxs[is_indices[i]] = len+2; 3063 } 3064 } 3065 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3066 /* additional is (if any) */ 3067 if (nis) { 3068 PetscMPIInt psum; 3069 PetscInt j; 3070 for (j=0,psum=0;j<nis;j++) { 3071 PetscInt plen; 3072 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3073 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3074 psum += len+1; /* indices + lenght */ 3075 } 3076 ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr); 3077 for (j=0,psum=0;j<nis;j++) { 3078 PetscInt plen; 3079 const PetscInt *is_array_idxs; 3080 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3081 send_buffer_idxs_is[psum] = plen; 3082 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3083 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3084 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3085 psum += plen+1; /* indices + lenght */ 3086 } 3087 for (i=0;i<n_sends;i++) { 3088 ilengths_idxs_is[is_indices[i]] = psum; 3089 } 3090 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3091 } 3092 3093 buf_size_idxs = 0; 3094 buf_size_vals = 0; 3095 buf_size_idxs_is = 0; 3096 for (i=0;i<n_recvs;i++) { 3097 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3098 buf_size_vals += (PetscInt)olengths_vals[i]; 3099 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3100 } 3101 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3102 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3103 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3104 3105 /* get new tags for clean communications */ 3106 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3107 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3108 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3109 3110 /* allocate for requests */ 3111 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3112 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3113 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3114 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3115 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3116 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3117 3118 /* communications */ 3119 ptr_idxs = recv_buffer_idxs; 3120 ptr_vals = recv_buffer_vals; 3121 ptr_idxs_is = recv_buffer_idxs_is; 3122 for (i=0;i<n_recvs;i++) { 3123 source_dest = onodes[i]; 3124 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3125 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3126 ptr_idxs += olengths_idxs[i]; 3127 ptr_vals += olengths_vals[i]; 3128 if (nis) { 3129 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3130 ptr_idxs_is += olengths_idxs_is[i]; 3131 } 3132 } 3133 for (i=0;i<n_sends;i++) { 3134 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3135 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3136 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3137 if (nis) { 3138 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3139 } 3140 } 3141 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3142 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3143 3144 /* assemble new l2g map */ 3145 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3146 ptr_idxs = recv_buffer_idxs; 3147 new_local_rows = 0; 3148 for (i=0;i<n_recvs;i++) { 3149 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3150 ptr_idxs += olengths_idxs[i]; 3151 } 3152 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3153 ptr_idxs = recv_buffer_idxs; 3154 new_local_rows = 0; 3155 for (i=0;i<n_recvs;i++) { 3156 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3157 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3158 ptr_idxs += olengths_idxs[i]; 3159 } 3160 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3161 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3162 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3163 3164 /* infer new local matrix type from received local matrices type */ 3165 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3166 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3167 if (n_recvs) { 3168 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3169 ptr_idxs = recv_buffer_idxs; 3170 for (i=0;i<n_recvs;i++) { 3171 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3172 new_local_type_private = MATAIJ_PRIVATE; 3173 break; 3174 } 3175 ptr_idxs += olengths_idxs[i]; 3176 } 3177 switch (new_local_type_private) { 3178 case MATDENSE_PRIVATE: 3179 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3180 new_local_type = MATSEQAIJ; 3181 bs = 1; 3182 } else { /* if I receive only 1 dense matrix */ 3183 new_local_type = MATSEQDENSE; 3184 bs = 1; 3185 } 3186 break; 3187 case MATAIJ_PRIVATE: 3188 new_local_type = MATSEQAIJ; 3189 bs = 1; 3190 break; 3191 case MATBAIJ_PRIVATE: 3192 new_local_type = MATSEQBAIJ; 3193 break; 3194 case MATSBAIJ_PRIVATE: 3195 new_local_type = MATSEQSBAIJ; 3196 break; 3197 default: 3198 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3199 break; 3200 } 3201 } else { /* by default, new_local_type is seqdense */ 3202 new_local_type = MATSEQDENSE; 3203 bs = 1; 3204 } 3205 3206 /* create MATIS object if needed */ 3207 if (reuse == MAT_INITIAL_MATRIX) { 3208 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3209 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3210 } else { 3211 /* it also destroys the local matrices */ 3212 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3213 } 3214 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3215 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3216 3217 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3218 3219 /* Global to local map of received indices */ 3220 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3221 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3222 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3223 3224 /* restore attributes -> type of incoming data and its size */ 3225 buf_size_idxs = 0; 3226 for (i=0;i<n_recvs;i++) { 3227 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3228 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3229 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3230 } 3231 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3232 3233 /* set preallocation */ 3234 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3235 if (!newisdense) { 3236 PetscInt *new_local_nnz=0; 3237 3238 ptr_vals = recv_buffer_vals; 3239 ptr_idxs = recv_buffer_idxs_local; 3240 if (n_recvs) { 3241 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3242 } 3243 for (i=0;i<n_recvs;i++) { 3244 PetscInt j; 3245 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3246 for (j=0;j<*(ptr_idxs+1);j++) { 3247 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3248 } 3249 } else { 3250 /* TODO */ 3251 } 3252 ptr_idxs += olengths_idxs[i]; 3253 } 3254 if (new_local_nnz) { 3255 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3256 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3257 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3258 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3259 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3260 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3261 } else { 3262 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3263 } 3264 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3265 } else { 3266 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3267 } 3268 3269 /* set values */ 3270 ptr_vals = recv_buffer_vals; 3271 ptr_idxs = recv_buffer_idxs_local; 3272 for (i=0;i<n_recvs;i++) { 3273 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3274 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3275 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3276 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3277 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3278 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3279 } else { 3280 /* TODO */ 3281 } 3282 ptr_idxs += olengths_idxs[i]; 3283 ptr_vals += olengths_vals[i]; 3284 } 3285 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3286 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3287 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3288 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3289 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3290 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3291 3292 #if 0 3293 if (!restrict_comm) { /* check */ 3294 Vec lvec,rvec; 3295 PetscReal infty_error; 3296 3297 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3298 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3299 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3300 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3301 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3302 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3303 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3304 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3305 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3306 } 3307 #endif 3308 3309 /* assemble new additional is (if any) */ 3310 if (nis) { 3311 PetscInt **temp_idxs,*count_is,j,psum; 3312 3313 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3314 ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr); 3315 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3316 ptr_idxs = recv_buffer_idxs_is; 3317 psum = 0; 3318 for (i=0;i<n_recvs;i++) { 3319 for (j=0;j<nis;j++) { 3320 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3321 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3322 psum += plen; 3323 ptr_idxs += plen+1; /* shift pointer to received data */ 3324 } 3325 } 3326 ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr); 3327 ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr); 3328 for (i=1;i<nis;i++) { 3329 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3330 } 3331 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3332 ptr_idxs = recv_buffer_idxs_is; 3333 for (i=0;i<n_recvs;i++) { 3334 for (j=0;j<nis;j++) { 3335 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3336 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3337 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3338 ptr_idxs += plen+1; /* shift pointer to received data */ 3339 } 3340 } 3341 for (i=0;i<nis;i++) { 3342 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3343 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3344 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3345 } 3346 ierr = PetscFree(count_is);CHKERRQ(ierr); 3347 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3348 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3349 } 3350 /* free workspace */ 3351 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3352 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3353 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3354 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3355 if (isdense) { 3356 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3357 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3358 } else { 3359 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3360 } 3361 if (nis) { 3362 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3363 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3364 } 3365 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3366 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3367 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3368 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3369 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3370 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3371 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3372 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3373 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3374 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3375 ierr = PetscFree(onodes);CHKERRQ(ierr); 3376 if (nis) { 3377 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3378 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3379 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3380 } 3381 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3382 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3383 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3384 for (i=0;i<nis;i++) { 3385 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3386 } 3387 } 3388 PetscFunctionReturn(0); 3389 } 3390 3391 /* temporary hack into ksp private data structure */ 3392 #include <petsc-private/kspimpl.h> 3393 3394 #undef __FUNCT__ 3395 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3396 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3397 { 3398 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3399 PC_IS *pcis = (PC_IS*)pc->data; 3400 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3401 MatNullSpace CoarseNullSpace=NULL; 3402 ISLocalToGlobalMapping coarse_islg; 3403 IS coarse_is,*isarray; 3404 PetscInt i,im_active=-1,active_procs=-1; 3405 PetscInt nis,nisdofs,nisneu; 3406 PC pc_temp; 3407 PCType coarse_pc_type; 3408 KSPType coarse_ksp_type; 3409 PetscBool multilevel_requested,multilevel_allowed; 3410 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3411 Mat t_coarse_mat_is; 3412 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3413 PetscMPIInt all_procs; 3414 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3415 PetscBool compute_vecs = PETSC_FALSE; 3416 PetscScalar *array; 3417 PetscErrorCode ierr; 3418 3419 PetscFunctionBegin; 3420 /* Assign global numbering to coarse dofs */ 3421 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3422 compute_vecs = PETSC_TRUE; 3423 PetscInt ocoarse_size; 3424 ocoarse_size = pcbddc->coarse_size; 3425 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3426 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3427 /* see if we can avoid some work */ 3428 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3429 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3430 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3431 coarse_reuse = PETSC_FALSE; 3432 } else { /* we can safely reuse already computed coarse matrix */ 3433 coarse_reuse = PETSC_TRUE; 3434 } 3435 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3436 coarse_reuse = PETSC_FALSE; 3437 } 3438 /* reset any subassembling information */ 3439 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3440 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3441 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3442 coarse_reuse = PETSC_TRUE; 3443 } 3444 3445 /* count "active" (i.e. with positive local size) and "void" processes */ 3446 im_active = !!(pcis->n); 3447 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3448 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3449 void_procs = all_procs-active_procs; 3450 csin_type_simple = PETSC_TRUE; 3451 redist = PETSC_FALSE; 3452 if (pcbddc->current_level && void_procs) { 3453 csin_ml = PETSC_TRUE; 3454 ncoarse_ml = void_procs; 3455 csin_ds = PETSC_TRUE; 3456 ncoarse_ds = void_procs; 3457 } else { 3458 csin_ml = PETSC_FALSE; 3459 ncoarse_ml = all_procs; 3460 if (void_procs) { 3461 csin_ds = PETSC_TRUE; 3462 ncoarse_ds = void_procs; 3463 csin_type_simple = PETSC_FALSE; 3464 } else { 3465 if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) { 3466 csin_ds = PETSC_TRUE; 3467 ncoarse_ds = pcbddc->redistribute_coarse; 3468 redist = PETSC_TRUE; 3469 } else { 3470 csin_ds = PETSC_FALSE; 3471 ncoarse_ds = all_procs; 3472 } 3473 } 3474 } 3475 3476 /* 3477 test if we can go multilevel: three conditions must be satisfied: 3478 - we have not exceeded the number of levels requested 3479 - we can actually subassemble the active processes 3480 - we can find a suitable number of MPI processes where we can place the subassembled problem 3481 */ 3482 multilevel_allowed = PETSC_FALSE; 3483 multilevel_requested = PETSC_FALSE; 3484 if (pcbddc->current_level < pcbddc->max_levels) { 3485 multilevel_requested = PETSC_TRUE; 3486 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3487 multilevel_allowed = PETSC_FALSE; 3488 } else { 3489 multilevel_allowed = PETSC_TRUE; 3490 } 3491 } 3492 /* determine number of process partecipating to coarse solver */ 3493 if (multilevel_allowed) { 3494 ncoarse = ncoarse_ml; 3495 csin = csin_ml; 3496 } else { 3497 ncoarse = ncoarse_ds; 3498 csin = csin_ds; 3499 } 3500 3501 /* creates temporary l2gmap and IS for coarse indexes */ 3502 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3503 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3504 3505 /* creates temporary MATIS object for coarse matrix */ 3506 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3507 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3508 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3509 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3510 #if 0 3511 { 3512 PetscViewer viewer; 3513 char filename[256]; 3514 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3515 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3516 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3517 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3518 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3519 } 3520 #endif 3521 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3522 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3523 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3524 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3525 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3526 3527 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3528 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3529 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3530 const PetscInt *idxs; 3531 ISLocalToGlobalMapping tmap; 3532 3533 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3534 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3535 /* allocate space for temporary storage */ 3536 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3537 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3538 /* allocate for IS array */ 3539 nisdofs = pcbddc->n_ISForDofsLocal; 3540 nisneu = !!pcbddc->NeumannBoundariesLocal; 3541 nis = nisdofs + nisneu; 3542 ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr); 3543 /* dofs splitting */ 3544 for (i=0;i<nisdofs;i++) { 3545 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3546 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3547 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3548 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3549 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3550 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3551 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3552 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3553 } 3554 /* neumann boundaries */ 3555 if (pcbddc->NeumannBoundariesLocal) { 3556 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3557 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3558 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3559 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3560 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3561 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3562 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3563 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3564 } 3565 /* free memory */ 3566 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3567 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3568 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3569 } else { 3570 nis = 0; 3571 nisdofs = 0; 3572 nisneu = 0; 3573 isarray = NULL; 3574 } 3575 /* destroy no longer needed map */ 3576 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3577 3578 /* restrict on coarse candidates (if needed) */ 3579 coarse_mat_is = NULL; 3580 if (csin) { 3581 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3582 if (redist) { 3583 PetscMPIInt rank; 3584 PetscInt spc,n_spc_p1,dest[1]; 3585 3586 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3587 spc = all_procs/pcbddc->redistribute_coarse; 3588 n_spc_p1 = all_procs%pcbddc->redistribute_coarse; 3589 if (rank > n_spc_p1*(spc+1)-1) { 3590 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 3591 } else { 3592 dest[0] = rank/(spc+1); 3593 } 3594 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3595 ierr = ISView(pcbddc->coarse_subassembling_init,NULL);CHKERRQ(ierr); 3596 } else { 3597 PetscInt j,tissize,*nisindices; 3598 PetscInt *coarse_candidates; 3599 const PetscInt* tisindices; 3600 /* get coarse candidates' ranks in pc communicator */ 3601 ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr); 3602 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3603 for (i=0,j=0;i<all_procs;i++) { 3604 if (!coarse_candidates[i]) { 3605 coarse_candidates[j]=i; 3606 j++; 3607 } 3608 } 3609 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3610 /* get a suitable subassembling pattern */ 3611 if (csin_type_simple) { 3612 PetscMPIInt rank; 3613 PetscInt issize,isidx; 3614 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3615 if (im_active) { 3616 issize = 1; 3617 isidx = (PetscInt)rank; 3618 } else { 3619 issize = 0; 3620 isidx = -1; 3621 } 3622 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3623 } else { 3624 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3625 } 3626 if (pcbddc->dbg_flag) { 3627 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3628 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3629 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3630 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3631 for (i=0;i<j;i++) { 3632 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3633 } 3634 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3635 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3636 } 3637 /* shift the pattern on coarse candidates */ 3638 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3639 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3640 ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr); 3641 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3642 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3643 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3644 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3645 } 3646 } 3647 if (pcbddc->dbg_flag) { 3648 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3649 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3650 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3651 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3652 } 3653 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3654 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3655 } else { 3656 if (pcbddc->dbg_flag) { 3657 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3658 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3659 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3660 } 3661 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3662 coarse_mat_is = t_coarse_mat_is; 3663 } 3664 3665 /* create local to global scatters for coarse problem */ 3666 if (compute_vecs) { 3667 PetscInt lrows; 3668 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3669 if (coarse_mat_is) { 3670 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3671 } else { 3672 lrows = 0; 3673 } 3674 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3675 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3676 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3677 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3678 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3679 } 3680 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3681 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3682 3683 /* set defaults for coarse KSP and PC */ 3684 if (multilevel_allowed) { 3685 coarse_ksp_type = KSPRICHARDSON; 3686 coarse_pc_type = PCBDDC; 3687 } else { 3688 coarse_ksp_type = KSPPREONLY; 3689 coarse_pc_type = PCREDUNDANT; 3690 } 3691 3692 /* print some info if requested */ 3693 if (pcbddc->dbg_flag) { 3694 if (!multilevel_allowed) { 3695 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3696 if (multilevel_requested) { 3697 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3698 } else if (pcbddc->max_levels) { 3699 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3700 } 3701 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3702 } 3703 } 3704 3705 /* create the coarse KSP object only once with defaults */ 3706 if (coarse_mat_is) { 3707 MatReuse coarse_mat_reuse; 3708 PetscViewer dbg_viewer = NULL; 3709 if (pcbddc->dbg_flag) { 3710 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3711 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3712 } 3713 if (!pcbddc->coarse_ksp) { 3714 char prefix[256],str_level[16]; 3715 size_t len; 3716 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3717 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3718 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3719 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3720 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3721 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3722 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3723 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3724 /* prefix */ 3725 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3726 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3727 if (!pcbddc->current_level) { 3728 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3729 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3730 } else { 3731 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3732 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3733 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3734 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3735 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3736 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3737 } 3738 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3739 /* allow user customization */ 3740 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3741 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3742 } 3743 3744 /* get some info after set from options */ 3745 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3746 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3747 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3748 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3749 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3750 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3751 isbddc = PETSC_FALSE; 3752 } 3753 if (isredundant) { 3754 KSP inner_ksp; 3755 PC inner_pc; 3756 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3757 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3758 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3759 } 3760 3761 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3762 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3763 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3764 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3765 if (nisdofs) { 3766 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3767 for (i=0;i<nisdofs;i++) { 3768 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3769 } 3770 } 3771 if (nisneu) { 3772 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3773 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3774 } 3775 3776 /* assemble coarse matrix */ 3777 if (coarse_reuse) { 3778 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3779 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3780 coarse_mat_reuse = MAT_REUSE_MATRIX; 3781 } else { 3782 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3783 } 3784 if (isbddc || isnn) { 3785 if (pcbddc->coarsening_ratio > 1) { 3786 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3787 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3788 if (pcbddc->dbg_flag) { 3789 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3790 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3791 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3792 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3793 } 3794 } 3795 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3796 } else { 3797 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 3798 coarse_mat = coarse_mat_is; 3799 } 3800 } else { 3801 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3802 } 3803 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3804 3805 /* propagate symmetry info to coarse matrix */ 3806 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3807 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3808 3809 /* set operators */ 3810 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3811 if (pcbddc->dbg_flag) { 3812 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3813 } 3814 } else { /* processes non partecipating to coarse solver (if any) */ 3815 coarse_mat = 0; 3816 } 3817 ierr = PetscFree(isarray);CHKERRQ(ierr); 3818 #if 0 3819 { 3820 PetscViewer viewer; 3821 char filename[256]; 3822 sprintf(filename,"coarse_mat.m"); 3823 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3824 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3825 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3826 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3827 } 3828 #endif 3829 3830 /* Compute coarse null space (special handling by BDDC only) */ 3831 if (pcbddc->NullSpace) { 3832 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3833 } 3834 3835 if (pcbddc->coarse_ksp) { 3836 Vec crhs,csol; 3837 PetscBool ispreonly; 3838 if (CoarseNullSpace) { 3839 if (isbddc) { 3840 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3841 } else { 3842 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3843 } 3844 } 3845 /* setup coarse ksp */ 3846 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3847 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3848 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3849 /* hack */ 3850 if (!csol) { 3851 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3852 } 3853 if (!crhs) { 3854 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3855 } 3856 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3857 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3858 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3859 KSP check_ksp; 3860 KSPType check_ksp_type; 3861 PC check_pc; 3862 Vec check_vec,coarse_vec; 3863 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 3864 PetscInt its; 3865 PetscBool compute_eigs; 3866 PetscReal *eigs_r,*eigs_c; 3867 PetscInt neigs; 3868 const char *prefix; 3869 3870 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3871 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3872 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3873 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3874 if (ispreonly) { 3875 check_ksp_type = KSPPREONLY; 3876 compute_eigs = PETSC_FALSE; 3877 } else { 3878 check_ksp_type = KSPGMRES; 3879 compute_eigs = PETSC_TRUE; 3880 } 3881 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3882 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3883 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3884 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3885 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 3886 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 3887 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 3888 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 3889 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3890 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3891 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3892 /* create random vec */ 3893 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 3894 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 3895 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3896 if (CoarseNullSpace) { 3897 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3898 } 3899 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3900 /* solve coarse problem */ 3901 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 3902 if (CoarseNullSpace) { 3903 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 3904 } 3905 /* set eigenvalue estimation if preonly has not been requested */ 3906 if (compute_eigs) { 3907 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr); 3908 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr); 3909 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 3910 lambda_max = eigs_r[neigs-1]; 3911 lambda_min = eigs_r[0]; 3912 if (pcbddc->use_coarse_estimates) { 3913 if (lambda_max>lambda_min) { 3914 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3915 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3916 } 3917 } 3918 } 3919 3920 /* check coarse problem residual error */ 3921 if (pcbddc->dbg_flag) { 3922 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 3923 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3924 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 3925 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3926 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3927 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3928 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3929 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 3930 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 3931 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 3932 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3933 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3934 if (compute_eigs) { 3935 PetscReal lambda_max_s,lambda_min_s; 3936 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 3937 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3938 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 3939 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 3940 for (i=0;i<neigs;i++) { 3941 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 3942 } 3943 } 3944 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3945 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3946 } 3947 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3948 if (compute_eigs) { 3949 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 3950 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 3951 } 3952 } 3953 } 3954 /* print additional info */ 3955 if (pcbddc->dbg_flag) { 3956 /* waits until all processes reaches this point */ 3957 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 3958 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3959 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3960 } 3961 3962 /* free memory */ 3963 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3964 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3965 PetscFunctionReturn(0); 3966 } 3967 3968 #undef __FUNCT__ 3969 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3970 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3971 { 3972 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3973 PC_IS* pcis = (PC_IS*)pc->data; 3974 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3975 PetscInt i,coarse_size; 3976 PetscInt *local_primal_indices; 3977 PetscErrorCode ierr; 3978 3979 PetscFunctionBegin; 3980 /* Compute global number of coarse dofs */ 3981 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3982 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3983 } 3984 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3985 3986 /* check numbering */ 3987 if (pcbddc->dbg_flag) { 3988 PetscScalar coarsesum,*array; 3989 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 3990 3991 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3992 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3993 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3994 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3995 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3996 for (i=0;i<pcbddc->local_primal_size;i++) { 3997 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3998 } 3999 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4000 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4001 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4002 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4003 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4004 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4005 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4006 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4007 for (i=0;i<pcis->n;i++) { 4008 if (array[i] == 1.0) { 4009 set_error = PETSC_TRUE; 4010 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4011 } 4012 } 4013 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4014 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4015 for (i=0;i<pcis->n;i++) { 4016 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4017 } 4018 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4019 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4020 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4021 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4022 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4023 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4024 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4025 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4026 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4027 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4028 for (i=0;i<pcbddc->local_primal_size;i++) { 4029 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4030 } 4031 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4032 } 4033 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4034 if (set_error_reduced) { 4035 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4036 } 4037 } 4038 /* get back data */ 4039 *coarse_size_n = coarse_size; 4040 *local_primal_indices_n = local_primal_indices; 4041 PetscFunctionReturn(0); 4042 } 4043 4044 #undef __FUNCT__ 4045 #define __FUNCT__ "PCBDDCGlobalToLocal" 4046 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4047 { 4048 IS localis_t; 4049 PetscInt i,lsize,*idxs,n; 4050 PetscScalar *vals; 4051 PetscErrorCode ierr; 4052 4053 PetscFunctionBegin; 4054 /* get indices in local ordering exploiting local to global map */ 4055 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4056 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 4057 for (i=0;i<lsize;i++) vals[i] = 1.0; 4058 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4059 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4060 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4061 if (idxs) { /* multilevel guard */ 4062 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4063 } 4064 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4065 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4066 ierr = PetscFree(vals);CHKERRQ(ierr); 4067 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4068 /* now compute set in local ordering */ 4069 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4070 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4071 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4072 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4073 for (i=0,lsize=0;i<n;i++) { 4074 if (PetscRealPart(vals[i]) > 0.5) { 4075 lsize++; 4076 } 4077 } 4078 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 4079 for (i=0,lsize=0;i<n;i++) { 4080 if (PetscRealPart(vals[i]) > 0.5) { 4081 idxs[lsize++] = i; 4082 } 4083 } 4084 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4085 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4086 *localis = localis_t; 4087 PetscFunctionReturn(0); 4088 } 4089 4090 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4091 #undef __FUNCT__ 4092 #define __FUNCT__ "PCBDDCMatMult_Private" 4093 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4094 { 4095 PCBDDCChange_ctx change_ctx; 4096 PetscErrorCode ierr; 4097 4098 PetscFunctionBegin; 4099 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4100 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4101 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4102 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4103 PetscFunctionReturn(0); 4104 } 4105 4106 #undef __FUNCT__ 4107 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4108 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4109 { 4110 PCBDDCChange_ctx change_ctx; 4111 PetscErrorCode ierr; 4112 4113 PetscFunctionBegin; 4114 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4115 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4116 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4117 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4118 PetscFunctionReturn(0); 4119 } 4120