1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 23 24 /* Change global null space passed in by the user if change of basis has been requested */ 25 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 26 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 27 } 28 29 /* 30 Setup local correction and local part of coarse basis. 31 Gives back the dense local part of the coarse matrix in column major ordering 32 */ 33 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 34 35 /* Compute total number of coarse nodes and setup coarse solver */ 36 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 37 38 /* free */ 39 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 40 PetscFunctionReturn(0); 41 } 42 43 #undef __FUNCT__ 44 #define __FUNCT__ "PCBDDCResetCustomization" 45 PetscErrorCode PCBDDCResetCustomization(PC pc) 46 { 47 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 48 PetscInt i; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 57 for (i=0;i<pcbddc->n_ISForDofs;i++) { 58 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 59 } 60 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 61 pcbddc->n_ISForDofs = 0; 62 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 63 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "PCBDDCResetTopography" 69 PetscErrorCode PCBDDCResetTopography(PC pc) 70 { 71 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 72 PetscErrorCode ierr; 73 74 PetscFunctionBegin; 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 107 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 108 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 110 PetscFunctionReturn(0); 111 } 112 113 #undef __FUNCT__ 114 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 115 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 116 { 117 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 118 PC_IS *pcis = (PC_IS*)pc->data; 119 VecType impVecType; 120 PetscInt n_vertices,n_constraints,local_primal_size,n_R,old_size; 121 PetscErrorCode ierr; 122 123 PetscFunctionBegin; 124 if (!pcbddc->ConstraintMatrix) { 125 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 126 } 127 /* get sizes */ 128 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,NULL);CHKERRQ(ierr); 129 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 130 n_constraints = local_primal_size - n_vertices; 131 n_R = pcis->n-n_vertices; 132 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 133 /* local work vectors (try to avoid unneeded work)*/ 134 /* R nodes */ 135 old_size = -1; 136 if (pcbddc->vec1_R) { 137 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 138 } 139 if (n_R != old_size) { 140 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 141 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 142 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 143 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 144 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 145 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 146 } 147 /* local primal dofs */ 148 old_size = -1; 149 if (pcbddc->vec1_P) { 150 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 151 } 152 if (local_primal_size != old_size) { 153 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 154 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 155 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 156 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 157 } 158 /* local explicit constraints */ 159 old_size = -1; 160 if (pcbddc->vec1_C) { 161 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 162 } 163 if (n_constraints && n_constraints != old_size) { 164 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 165 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 166 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 167 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "PCBDDCSetUpCorrection" 174 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 175 { 176 PetscErrorCode ierr; 177 /* pointers to pcis and pcbddc */ 178 PC_IS* pcis = (PC_IS*)pc->data; 179 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 180 /* submatrices of local problem */ 181 Mat A_RV,A_VR,A_VV; 182 /* working matrices */ 183 Mat M1,M2,M3,C_CR; 184 /* working vectors */ 185 Vec vec1_C,vec2_C,vec1_V,vec2_V; 186 /* additional working stuff */ 187 IS is_aux; 188 ISLocalToGlobalMapping BtoNmap; 189 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 190 const PetscScalar *array,*row_cmat_values; 191 const PetscInt *row_cmat_indices,*idx_R_local; 192 PetscInt *vertices,*idx_V_B,*auxindices; 193 PetscInt n_vertices,n_constraints,size_of_constraint; 194 PetscInt i,j,n_R,n_D,n_B; 195 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 196 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 197 MatType impMatType; 198 /* some shortcuts to scalars */ 199 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 200 /* for debugging purposes */ 201 PetscReal *coarsefunctions_errors,*constraints_errors; 202 203 PetscFunctionBegin; 204 /* get number of vertices and their local indices */ 205 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 206 n_constraints = pcbddc->local_primal_size-n_vertices; 207 /* Set Non-overlapping dimensions */ 208 n_B = pcis->n_B; n_D = pcis->n - n_B; 209 n_R = pcis->n-n_vertices; 210 211 /* Set types for local objects needed by BDDC precondtioner */ 212 impMatType = MATSEQDENSE; 213 214 /* Allocating some extra storage just to be safe */ 215 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 216 for (i=0;i<pcis->n;i++) auxindices[i]=i; 217 218 /* vertices in boundary numbering */ 219 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 220 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 221 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 222 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 223 if (i != n_vertices) { 224 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 225 } 226 227 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 228 if (n_constraints) { 229 /* see if we can save some allocations */ 230 if (pcbddc->local_auxmat2) { 231 PetscInt on_R,on_constraints; 232 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 233 if (on_R != n_R || on_constraints != n_constraints) { 234 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 235 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 236 } 237 } 238 /* work vectors */ 239 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 240 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 241 /* auxiliary matrices */ 242 if (!pcbddc->local_auxmat2) { 243 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 244 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 245 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 246 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 247 } 248 249 /* Extract constraints on R nodes: C_{CR} */ 250 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 251 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 252 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 253 254 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 255 for (i=0;i<n_constraints;i++) { 256 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 257 /* Get row of constraint matrix in R numbering */ 258 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 259 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 260 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 261 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 262 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 263 /* Solve for row of constraint matrix in R numbering */ 264 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 265 /* Set values in local_auxmat2 */ 266 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 267 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 268 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 269 } 270 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 271 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 272 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 273 274 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 275 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 276 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 277 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 278 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 279 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 280 ierr = MatSetUp(M1);CHKERRQ(ierr); 281 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 282 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 283 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 284 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 285 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 286 ierr = MatDestroy(&M2);CHKERRQ(ierr); 287 ierr = MatDestroy(&M3);CHKERRQ(ierr); 288 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 289 if (!pcbddc->local_auxmat1) { 290 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 291 } else { 292 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 293 } 294 } 295 296 /* Get submatrices from subdomain matrix */ 297 if (n_vertices) { 298 PetscInt ibs,mbs; 299 PetscBool issbaij; 300 Mat newmat; 301 302 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 303 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 304 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 305 if (ibs != mbs) { /* need to convert to SEQAIJ */ 306 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 307 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 308 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 309 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 310 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 311 } else { 312 /* this is safe */ 313 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 314 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 315 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 316 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 317 /* which of the two approaches is faster? */ 318 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 319 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 320 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 321 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 322 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 323 } else { 324 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 325 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 326 } 327 } 328 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 329 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 330 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 331 } 332 333 /* Matrix of coarse basis functions (local) */ 334 if (pcbddc->coarse_phi_B) { 335 PetscInt on_B,on_primal; 336 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 337 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 338 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 339 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 340 } 341 } 342 if (pcbddc->coarse_phi_D) { 343 PetscInt on_D,on_primal; 344 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 345 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 346 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 347 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 348 } 349 } 350 if (!pcbddc->coarse_phi_B) { 351 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 352 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 353 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 354 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 355 } 356 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 357 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 358 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 359 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 360 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 361 } 362 363 if (pcbddc->dbg_flag) { 364 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 365 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 366 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 367 } 368 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 369 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 370 371 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 372 373 /* vertices */ 374 for (i=0;i<n_vertices;i++) { 375 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 376 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 377 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 378 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 379 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 380 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 381 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 382 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 383 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 384 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 385 if (n_constraints) { 386 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 387 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 388 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 389 } 390 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 391 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 392 393 /* Set values in coarse basis function and subdomain part of coarse_mat */ 394 /* coarse basis functions */ 395 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 396 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 397 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 398 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 399 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 400 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 401 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 402 if (pcbddc->switch_static || pcbddc->dbg_flag) { 403 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 404 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 405 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 406 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 407 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 408 } 409 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 410 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 411 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 412 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 413 if (n_constraints) { 414 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 415 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 416 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 417 } 418 419 /* check */ 420 if (pcbddc->dbg_flag) { 421 /* assemble subdomain vector on local nodes */ 422 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 423 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 424 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 425 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 426 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 427 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 428 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 429 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 430 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 431 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 432 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 434 if (n_constraints) { 435 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 436 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 437 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 438 } 439 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 440 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 441 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 442 /* check saddle point solution */ 443 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 444 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 445 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 446 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 447 /* shift by the identity matrix */ 448 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 449 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 450 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 451 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 452 } 453 } 454 455 /* constraints */ 456 for (i=0;i<n_constraints;i++) { 457 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 458 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 459 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 460 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 461 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 462 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 463 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 464 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 465 if (n_vertices) { 466 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 467 } 468 /* Set values in coarse basis function and subdomain part of coarse_mat */ 469 /* coarse basis functions */ 470 j = i+n_vertices; /* don't touch this! */ 471 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 472 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 473 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 474 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 475 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 476 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 477 if (pcbddc->switch_static || pcbddc->dbg_flag) { 478 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 480 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 481 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 482 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 483 } 484 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 485 if (n_vertices) { 486 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 487 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 488 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 489 } 490 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 491 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 492 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 493 494 if (pcbddc->dbg_flag) { 495 /* assemble subdomain vector on nodes */ 496 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 497 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 498 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 499 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 500 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 501 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 502 /* assemble subdomain vector of lagrange multipliers */ 503 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 504 if (n_vertices) { 505 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 506 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 507 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 508 } 509 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 510 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 511 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 512 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 513 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 514 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 515 /* check saddle point solution */ 516 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 517 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 518 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 519 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 520 /* shift by the identity matrix */ 521 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 522 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 523 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 524 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 525 } 526 } 527 /* call assembling routines for local coarse basis */ 528 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 529 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 530 if (pcbddc->switch_static || pcbddc->dbg_flag) { 531 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 532 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 533 } 534 535 /* compute other basis functions for non-symmetric problems */ 536 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 537 if (!setsym || (setsym && !issym)) { 538 if (!pcbddc->coarse_psi_B) { 539 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 540 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 541 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 542 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 543 } 544 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 545 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 546 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 547 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 548 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 549 } 550 for (i=0;i<pcbddc->local_primal_size;i++) { 551 if (n_constraints) { 552 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 553 for (j=0;j<n_constraints;j++) { 554 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 555 } 556 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 557 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 558 } 559 if (i<n_vertices) { 560 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 561 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 562 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 563 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 564 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 565 if (n_constraints) { 566 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 567 } 568 } else { 569 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 570 } 571 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 572 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 573 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 574 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 575 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 576 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 577 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 578 if (i<n_vertices) { 579 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 580 } 581 if (pcbddc->switch_static || pcbddc->dbg_flag) { 582 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 583 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 584 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 585 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 586 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 587 } 588 589 if (pcbddc->dbg_flag) { 590 /* assemble subdomain vector on nodes */ 591 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 592 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 593 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 594 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 595 if (i<n_vertices) { 596 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 597 } 598 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 599 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 600 /* assemble subdomain vector of lagrange multipliers */ 601 for (j=0;j<pcbddc->local_primal_size;j++) { 602 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 603 } 604 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 605 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 606 /* check saddle point solution */ 607 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 608 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 609 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 610 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 611 /* shift by the identity matrix */ 612 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 613 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 614 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 615 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 616 } 617 } 618 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 619 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 620 if (pcbddc->switch_static || pcbddc->dbg_flag) { 621 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 622 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 623 } 624 } 625 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 626 /* Checking coarse_sub_mat and coarse basis functios */ 627 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 628 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 629 if (pcbddc->dbg_flag) { 630 Mat coarse_sub_mat; 631 Mat AUXMAT,TM1,TM2,TM3,TM4; 632 Mat coarse_phi_D,coarse_phi_B; 633 Mat coarse_psi_D,coarse_psi_B; 634 Mat A_II,A_BB,A_IB,A_BI; 635 MatType checkmattype=MATSEQAIJ; 636 PetscReal real_value; 637 638 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 639 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 640 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 641 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 642 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 643 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 644 if (pcbddc->coarse_psi_B) { 645 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 646 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 647 } 648 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 649 650 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 651 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 652 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 653 if (pcbddc->coarse_psi_B) { 654 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 655 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 656 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 657 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 658 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 659 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 660 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 661 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 662 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 663 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 664 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 665 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 666 } else { 667 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 668 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 669 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 670 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 671 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 672 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 673 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 674 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 675 } 676 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 677 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 678 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 679 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 680 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 681 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 682 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 683 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 684 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 685 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 686 for (i=0;i<pcbddc->local_primal_size;i++) { 687 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 688 } 689 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 690 for (i=0;i<pcbddc->local_primal_size;i++) { 691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 692 } 693 if (pcbddc->coarse_psi_B) { 694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 695 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 697 } 698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 699 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 701 } 702 } 703 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 704 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 705 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 706 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 707 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 708 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 709 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 710 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 711 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 712 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 713 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 714 if (pcbddc->coarse_psi_B) { 715 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 716 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 717 } 718 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 719 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 720 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 721 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 722 } 723 /* free memory */ 724 if (n_vertices) { 725 ierr = PetscFree(vertices);CHKERRQ(ierr); 726 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 727 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 728 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 729 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 730 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 731 } 732 if (n_constraints) { 733 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 734 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 735 ierr = MatDestroy(&M1);CHKERRQ(ierr); 736 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 737 } 738 ierr = PetscFree(auxindices);CHKERRQ(ierr); 739 /* get back data */ 740 *coarse_submat_vals_n = coarse_submat_vals; 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 746 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 747 { 748 PC_IS* pcis = (PC_IS*)(pc->data); 749 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 750 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 751 PetscBool issbaij,isbaij; 752 /* manage repeated solves */ 753 MatReuse reuse; 754 MatStructure matstruct; 755 PetscErrorCode ierr; 756 757 PetscFunctionBegin; 758 if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) { 759 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 760 } 761 /* get mat flags */ 762 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 763 reuse = MAT_INITIAL_MATRIX; 764 if (pc->setupcalled) { 765 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 766 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 767 if (matstruct == SAME_NONZERO_PATTERN) { 768 reuse = MAT_REUSE_MATRIX; 769 } else { 770 reuse = MAT_INITIAL_MATRIX; 771 } 772 } 773 if (reuse == MAT_INITIAL_MATRIX) { 774 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 775 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 776 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 777 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 778 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 779 } 780 781 /* transform local matrices if needed */ 782 if (pcbddc->use_change_of_basis) { 783 Mat change_mat_all; 784 PetscScalar *row_cmat_values; 785 PetscInt *row_cmat_indices; 786 PetscInt *nnz,*is_indices,*temp_indices; 787 PetscInt i,j,k,n_D,n_B; 788 789 /* Get Non-overlapping dimensions */ 790 n_B = pcis->n_B; 791 n_D = pcis->n-n_B; 792 793 /* compute nonzero structure of change of basis on all local nodes */ 794 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 795 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 796 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 797 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 798 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 799 k=1; 800 for (i=0;i<n_B;i++) { 801 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 802 nnz[is_indices[i]]=j; 803 if (k < j) k = j; 804 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 805 } 806 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 807 /* assemble change of basis matrix on the whole set of local dofs */ 808 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 809 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 810 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 811 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 812 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 813 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 814 for (i=0;i<n_D;i++) { 815 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 816 } 817 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 818 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 819 for (i=0;i<n_B;i++) { 820 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 821 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 822 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 823 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 824 } 825 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 826 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 827 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 828 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 829 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 830 if (!issbaij && !isbaij) { 831 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 832 } else { 833 Mat work_mat; 834 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 835 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 836 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 837 } 838 /* 839 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 840 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 841 */ 842 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 843 ierr = PetscFree(nnz);CHKERRQ(ierr); 844 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 845 } else { 846 /* without change of basis, the local matrix is unchanged */ 847 if (!pcbddc->local_mat) { 848 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 849 pcbddc->local_mat = matis->A; 850 } 851 } 852 853 /* get submatrices */ 854 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 855 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 856 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 857 if (!issbaij) { 858 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 859 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 860 } else { 861 Mat newmat; 862 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 863 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 864 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 865 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 866 } 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 872 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 873 { 874 PC_IS* pcis = (PC_IS*)(pc->data); 875 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 876 IS is_aux1,is_aux2; 877 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 878 PetscInt n_vertices,i,j,n_R,n_D,n_B; 879 PetscInt vbs,bs; 880 PetscBT bitmask; 881 PetscErrorCode ierr; 882 883 PetscFunctionBegin; 884 /* No need to setup local scatters if primal space is unchanged */ 885 if (!pcbddc->new_primal_space) { 886 PetscFunctionReturn(0); 887 } 888 /* destroy old objects */ 889 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 890 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 891 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 892 /* Set Non-overlapping dimensions */ 893 n_B = pcis->n_B; n_D = pcis->n - n_B; 894 /* get vertex indices from constraint matrix */ 895 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 896 897 /* create auxiliary bitmask */ 898 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 899 for (i=0;i<n_vertices;i++) { 900 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 901 } 902 903 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 904 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 905 for (i=0, n_R=0; i<pcis->n; i++) { 906 if (!PetscBTLookup(bitmask,i)) { 907 idx_R_local[n_R] = i; 908 n_R++; 909 } 910 } 911 912 /* Block code */ 913 vbs = 1; 914 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 915 if (bs>1 && !(n_vertices%bs)) { 916 PetscBool is_blocked = PETSC_TRUE; 917 PetscInt *vary; 918 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 919 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 920 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 921 for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++; 922 for (i=0; i<n_vertices; i++) { 923 if (vary[i]!=0 && vary[i]!=bs) { 924 is_blocked = PETSC_FALSE; 925 break; 926 } 927 } 928 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 929 vbs = bs; 930 for (i=0;i<n_R/vbs;i++) { 931 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 932 } 933 } 934 ierr = PetscFree(vary);CHKERRQ(ierr); 935 } 936 ierr = PetscFree(vertices);CHKERRQ(ierr); 937 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 938 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 939 940 /* print some info if requested */ 941 if (pcbddc->dbg_flag) { 942 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 943 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 944 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 945 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 946 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 947 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 948 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 949 } 950 951 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 952 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 953 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 954 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 955 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 956 for (i=0; i<n_D; i++) { 957 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 958 } 959 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 960 for (i=0, j=0; i<n_R; i++) { 961 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 962 aux_array1[j++] = i; 963 } 964 } 965 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 966 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 967 for (i=0, j=0; i<n_B; i++) { 968 if (!PetscBTLookup(bitmask,is_indices[i])) { 969 aux_array2[j++] = i; 970 } 971 } 972 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 973 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 974 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 975 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 976 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 977 978 if (pcbddc->switch_static || pcbddc->dbg_flag) { 979 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 980 for (i=0, j=0; i<n_R; i++) { 981 if (PetscBTLookup(bitmask,idx_R_local[i])) { 982 aux_array1[j++] = i; 983 } 984 } 985 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 986 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 987 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 988 } 989 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 990 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } 993 994 995 #undef __FUNCT__ 996 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 997 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 998 { 999 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1000 PC_IS *pcis = (PC_IS*)pc->data; 1001 PC pc_temp; 1002 Mat A_RR; 1003 Vec vec1,vec2,vec3; 1004 MatStructure matstruct; 1005 MatReuse reuse; 1006 PetscScalar m_one = -1.0; 1007 PetscReal value; 1008 PetscInt n_D,n_R,ibs,mbs; 1009 PetscBool use_exact,use_exact_reduced,issbaij; 1010 PetscErrorCode ierr; 1011 /* prefixes stuff */ 1012 char dir_prefix[256],neu_prefix[256],str_level[3]; 1013 size_t len; 1014 1015 PetscFunctionBegin; 1016 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 1017 1018 /* compute prefixes */ 1019 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1020 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1021 if (!pcbddc->current_level) { 1022 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1023 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1024 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1025 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1026 } else { 1027 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1028 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 1029 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1030 len -= 15; /* remove "pc_bddc_coarse_" */ 1031 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 1032 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1033 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1034 *(dir_prefix+len)='\0'; 1035 *(neu_prefix+len)='\0'; 1036 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1037 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1038 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1039 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1040 } 1041 1042 /* DIRICHLET PROBLEM */ 1043 /* Matrix for Dirichlet problem is pcis->A_II */ 1044 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1045 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1046 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1047 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1048 /* default */ 1049 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1050 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1051 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1052 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1053 if (issbaij) { 1054 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1055 } else { 1056 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1057 } 1058 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1059 } 1060 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 1061 /* Allow user's customization */ 1062 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1063 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1064 if (!n_D) { 1065 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1066 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1067 } 1068 /* Set Up KSP for Dirichlet problem of BDDC */ 1069 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1070 /* set ksp_D into pcis data */ 1071 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1072 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1073 pcis->ksp_D = pcbddc->ksp_D; 1074 1075 /* NEUMANN PROBLEM */ 1076 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1077 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1078 if (pcbddc->ksp_R) { /* already created ksp */ 1079 PetscInt nn_R; 1080 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr); 1081 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1082 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1083 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1084 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1085 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1086 reuse = MAT_INITIAL_MATRIX; 1087 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1088 if (pcbddc->new_primal_space) { /* we are not sure the matrix will have the same nonzero pattern */ 1089 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1090 reuse = MAT_INITIAL_MATRIX; 1091 } else { /* safe to reuse the matrix */ 1092 reuse = MAT_REUSE_MATRIX; 1093 } 1094 } 1095 /* last check */ 1096 if (matstruct == DIFFERENT_NONZERO_PATTERN) { 1097 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1098 reuse = MAT_INITIAL_MATRIX; 1099 } 1100 } else { /* first time, so we need to create the matrix */ 1101 reuse = MAT_INITIAL_MATRIX; 1102 } 1103 /* extract A_RR */ 1104 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1105 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1106 if (ibs != mbs) { 1107 Mat newmat; 1108 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1109 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1110 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1111 } else { 1112 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1113 } 1114 if (!pcbddc->ksp_R) { /* create object if not present */ 1115 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1116 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1117 /* default */ 1118 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1119 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1120 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1121 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1122 if (issbaij) { 1123 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1124 } else { 1125 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1126 } 1127 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1128 } 1129 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1130 /* Allow user's customization */ 1131 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1132 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1133 if (!n_R) { 1134 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1135 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1136 } 1137 /* Set Up KSP for Neumann problem of BDDC */ 1138 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1139 1140 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1141 1142 /* Dirichlet */ 1143 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 1144 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1145 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1146 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 1147 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 1148 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1149 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1150 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1151 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1152 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1153 /* need to be adapted? */ 1154 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1155 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1156 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1157 /* print info */ 1158 if (pcbddc->dbg_flag) { 1159 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1160 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1161 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1162 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1163 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1164 } 1165 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1166 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1167 } 1168 1169 /* Neumann */ 1170 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 1171 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1172 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1173 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 1174 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 1175 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1176 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1177 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1178 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1179 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1180 /* need to be adapted? */ 1181 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1182 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1183 /* print info */ 1184 if (pcbddc->dbg_flag) { 1185 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1186 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1187 } 1188 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1189 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1190 } 1191 /* free Neumann problem's matrix */ 1192 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1198 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1199 { 1200 PetscErrorCode ierr; 1201 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1202 1203 PetscFunctionBegin; 1204 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1205 if (pcbddc->local_auxmat1) { 1206 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1207 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1208 } 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1214 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1215 { 1216 PetscErrorCode ierr; 1217 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1218 PC_IS* pcis = (PC_IS*) (pc->data); 1219 const PetscScalar zero = 0.0; 1220 1221 PetscFunctionBegin; 1222 /* Application of PHI^T (or PSI^T) */ 1223 if (pcbddc->coarse_psi_B) { 1224 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1225 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1226 } else { 1227 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1228 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1229 } 1230 /* Scatter data of coarse_rhs */ 1231 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1232 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1233 1234 /* Local solution on R nodes */ 1235 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1236 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1237 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1238 if (pcbddc->switch_static) { 1239 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1240 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1241 } 1242 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1243 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1244 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 if (pcbddc->switch_static) { 1247 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1248 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1249 } 1250 1251 /* Coarse solution */ 1252 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1253 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1254 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1255 } 1256 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1257 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1258 1259 /* Sum contributions from two levels */ 1260 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1261 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1267 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1268 { 1269 PetscErrorCode ierr; 1270 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1271 1272 PetscFunctionBegin; 1273 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1279 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1280 { 1281 PetscErrorCode ierr; 1282 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1283 1284 PetscFunctionBegin; 1285 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 /* uncomment for testing purposes */ 1290 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1293 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1294 { 1295 PetscErrorCode ierr; 1296 PC_IS* pcis = (PC_IS*)(pc->data); 1297 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1298 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1299 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1300 MatType impMatType=MATSEQAIJ; 1301 /* one and zero */ 1302 PetscScalar one=1.0,zero=0.0; 1303 /* space to store constraints and their local indices */ 1304 PetscScalar *temp_quadrature_constraint; 1305 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1306 /* iterators */ 1307 PetscInt i,j,k,total_counts,temp_start_ptr; 1308 /* stuff to store connected components stored in pcbddc->mat_graph */ 1309 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1310 PetscInt n_ISForFaces,n_ISForEdges; 1311 /* near null space stuff */ 1312 MatNullSpace nearnullsp; 1313 const Vec *nearnullvecs; 1314 Vec *localnearnullsp; 1315 PetscBool nnsp_has_cnst; 1316 PetscInt nnsp_size; 1317 PetscScalar *array; 1318 /* BLAS integers */ 1319 PetscBLASInt lwork,lierr; 1320 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1321 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1322 /* LAPACK working arrays for SVD or POD */ 1323 PetscBool skip_lapack; 1324 PetscScalar *work; 1325 PetscReal *singular_vals; 1326 #if defined(PETSC_USE_COMPLEX) 1327 PetscReal *rwork; 1328 #endif 1329 #if defined(PETSC_MISSING_LAPACK_GESVD) 1330 PetscBLASInt Blas_one_2=1; 1331 PetscScalar *temp_basis,*correlation_mat; 1332 #else 1333 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1334 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1335 #endif 1336 /* change of basis */ 1337 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1338 PetscBool boolforchange,qr_needed; 1339 PetscBT touched,change_basis,qr_needed_idx; 1340 /* auxiliary stuff */ 1341 PetscInt *nnz,*is_indices,*local_to_B; 1342 /* some quantities */ 1343 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1344 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1345 1346 1347 PetscFunctionBegin; 1348 /* Destroy Mat objects computed previously */ 1349 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1350 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1351 /* Get index sets for faces, edges and vertices from graph */ 1352 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1353 pcbddc->use_vertices = PETSC_TRUE; 1354 } 1355 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1356 /* HACK: provide functions to set change of basis */ 1357 if (!ISForVertices && pcbddc->NullSpace) { 1358 pcbddc->use_change_of_basis = PETSC_TRUE; 1359 pcbddc->use_change_on_faces = PETSC_FALSE; 1360 } 1361 /* print some info */ 1362 if (pcbddc->dbg_flag) { 1363 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1364 i = 0; 1365 if (ISForVertices) { 1366 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1367 } 1368 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1369 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1370 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1371 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1372 } 1373 /* check if near null space is attached to global mat */ 1374 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1375 if (nearnullsp) { 1376 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1377 /* remove any stored info */ 1378 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1379 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1380 /* store information for BDDC solver reuse */ 1381 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1382 pcbddc->onearnullspace = nearnullsp; 1383 ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1384 for (i=0;i<nnsp_size;i++) { 1385 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1386 } 1387 } else { /* if near null space is not provided BDDC uses constants by default */ 1388 nnsp_size = 0; 1389 nnsp_has_cnst = PETSC_TRUE; 1390 } 1391 /* get max number of constraints on a single cc */ 1392 max_constraints = nnsp_size; 1393 if (nnsp_has_cnst) max_constraints++; 1394 1395 /* 1396 Evaluate maximum storage size needed by the procedure 1397 - temp_indices will contain start index of each constraint stored as follows 1398 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1399 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1400 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1401 */ 1402 total_counts = n_ISForFaces+n_ISForEdges; 1403 total_counts *= max_constraints; 1404 n_vertices = 0; 1405 if (ISForVertices) { 1406 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1407 } 1408 total_counts += n_vertices; 1409 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1410 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1411 total_counts = 0; 1412 max_size_of_constraint = 0; 1413 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1414 if (i<n_ISForEdges) { 1415 used_IS = &ISForEdges[i]; 1416 } else { 1417 used_IS = &ISForFaces[i-n_ISForEdges]; 1418 } 1419 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1420 total_counts += j; 1421 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1422 } 1423 total_counts *= max_constraints; 1424 total_counts += n_vertices; 1425 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1426 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1427 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1428 /* local to boundary numbering */ 1429 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1430 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1431 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1432 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1433 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1434 /* get local part of global near null space vectors */ 1435 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1436 for (k=0;k<nnsp_size;k++) { 1437 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1438 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1439 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1440 } 1441 1442 /* whether or not to skip lapack calls */ 1443 skip_lapack = PETSC_TRUE; 1444 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1445 1446 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1447 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1448 PetscScalar temp_work; 1449 #if defined(PETSC_MISSING_LAPACK_GESVD) 1450 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1451 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1452 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1453 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1454 #if defined(PETSC_USE_COMPLEX) 1455 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1456 #endif 1457 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1458 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1459 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1460 lwork = -1; 1461 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1462 #if !defined(PETSC_USE_COMPLEX) 1463 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1464 #else 1465 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1466 #endif 1467 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1468 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1469 #else /* on missing GESVD */ 1470 /* SVD */ 1471 PetscInt max_n,min_n; 1472 max_n = max_size_of_constraint; 1473 min_n = max_constraints; 1474 if (max_size_of_constraint < max_constraints) { 1475 min_n = max_size_of_constraint; 1476 max_n = max_constraints; 1477 } 1478 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1479 #if defined(PETSC_USE_COMPLEX) 1480 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1481 #endif 1482 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1483 lwork = -1; 1484 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1485 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1486 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1487 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1488 #if !defined(PETSC_USE_COMPLEX) 1489 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1490 #else 1491 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1492 #endif 1493 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1494 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1495 #endif /* on missing GESVD */ 1496 /* Allocate optimal workspace */ 1497 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1498 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1499 } 1500 /* Now we can loop on constraining sets */ 1501 total_counts = 0; 1502 temp_indices[0] = 0; 1503 /* vertices */ 1504 if (ISForVertices) { 1505 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1506 if (nnsp_has_cnst) { /* consider all vertices */ 1507 for (i=0;i<n_vertices;i++) { 1508 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1509 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1510 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1511 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1512 total_counts++; 1513 } 1514 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1515 PetscBool used_vertex; 1516 for (i=0;i<n_vertices;i++) { 1517 used_vertex = PETSC_FALSE; 1518 k = 0; 1519 while (!used_vertex && k<nnsp_size) { 1520 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1521 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1522 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1523 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1524 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1525 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1526 total_counts++; 1527 used_vertex = PETSC_TRUE; 1528 } 1529 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1530 k++; 1531 } 1532 } 1533 } 1534 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1535 n_vertices = total_counts; 1536 } 1537 1538 /* edges and faces */ 1539 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1540 if (i<n_ISForEdges) { 1541 used_IS = &ISForEdges[i]; 1542 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1543 } else { 1544 used_IS = &ISForFaces[i-n_ISForEdges]; 1545 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1546 } 1547 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1548 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1549 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1550 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1551 /* change of basis should not be performed on local periodic nodes */ 1552 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1553 if (nnsp_has_cnst) { 1554 PetscScalar quad_value; 1555 temp_constraints++; 1556 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1557 for (j=0;j<size_of_constraint;j++) { 1558 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1559 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1560 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1561 } 1562 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1563 total_counts++; 1564 } 1565 for (k=0;k<nnsp_size;k++) { 1566 PetscReal real_value; 1567 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1568 for (j=0;j<size_of_constraint;j++) { 1569 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1570 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1571 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1572 } 1573 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1574 /* check if array is null on the connected component */ 1575 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1576 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1577 if (real_value > 0.0) { /* keep indices and values */ 1578 temp_constraints++; 1579 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1580 total_counts++; 1581 } 1582 } 1583 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1584 valid_constraints = temp_constraints; 1585 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1586 if (!pcbddc->use_nnsp_true && temp_constraints) { 1587 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1588 1589 #if defined(PETSC_MISSING_LAPACK_GESVD) 1590 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1591 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1592 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1593 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1594 from that computed using LAPACKgesvd 1595 -> This is due to a different computation of eigenvectors in LAPACKheev 1596 -> The quality of the POD-computed basis will be the same */ 1597 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1598 /* Store upper triangular part of correlation matrix */ 1599 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1600 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1601 for (j=0;j<temp_constraints;j++) { 1602 for (k=0;k<j+1;k++) { 1603 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1604 } 1605 } 1606 /* compute eigenvalues and eigenvectors of correlation matrix */ 1607 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1608 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1609 #if !defined(PETSC_USE_COMPLEX) 1610 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1611 #else 1612 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1613 #endif 1614 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1615 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1616 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1617 j=0; 1618 while (j < temp_constraints && singular_vals[j] < tol) j++; 1619 total_counts=total_counts-j; 1620 valid_constraints = temp_constraints-j; 1621 /* scale and copy POD basis into used quadrature memory */ 1622 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1623 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1624 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1625 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1626 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1627 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1628 if (j<temp_constraints) { 1629 PetscInt ii; 1630 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1631 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1632 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1633 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1634 for (k=0;k<temp_constraints-j;k++) { 1635 for (ii=0;ii<size_of_constraint;ii++) { 1636 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1637 } 1638 } 1639 } 1640 #else /* on missing GESVD */ 1641 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1642 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1643 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1644 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1645 #if !defined(PETSC_USE_COMPLEX) 1646 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1647 #else 1648 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1649 #endif 1650 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1651 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1652 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1653 k = temp_constraints; 1654 if (k > size_of_constraint) k = size_of_constraint; 1655 j = 0; 1656 while (j < k && singular_vals[k-j-1] < tol) j++; 1657 total_counts = total_counts-temp_constraints+k-j; 1658 valid_constraints = k-j; 1659 #endif /* on missing GESVD */ 1660 } 1661 /* setting change_of_basis flag is safe now */ 1662 if (boolforchange) { 1663 for (j=0;j<valid_constraints;j++) { 1664 PetscBTSet(change_basis,total_counts-j-1); 1665 } 1666 } 1667 } 1668 /* free index sets of faces, edges and vertices */ 1669 for (i=0;i<n_ISForFaces;i++) { 1670 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1671 } 1672 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1673 for (i=0;i<n_ISForEdges;i++) { 1674 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1675 } 1676 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1677 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1678 1679 /* free workspace */ 1680 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1681 ierr = PetscFree(work);CHKERRQ(ierr); 1682 #if defined(PETSC_USE_COMPLEX) 1683 ierr = PetscFree(rwork);CHKERRQ(ierr); 1684 #endif 1685 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1686 #if defined(PETSC_MISSING_LAPACK_GESVD) 1687 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1688 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1689 #endif 1690 } 1691 for (k=0;k<nnsp_size;k++) { 1692 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1693 } 1694 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1695 1696 /* set quantities in pcbddc data structure */ 1697 /* n_vertices defines the number of subdomain corners in the primal space */ 1698 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1699 pcbddc->local_primal_size = total_counts; 1700 pcbddc->n_vertices = n_vertices; 1701 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1702 1703 /* Create constraint matrix */ 1704 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1705 /* so we need to set it up properly either with or without change of basis */ 1706 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1707 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1708 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1709 /* array to compute a local numbering of constraints : vertices first then constraints */ 1710 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1711 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1712 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1713 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1714 /* auxiliary stuff for basis change */ 1715 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1716 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1717 1718 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1719 total_primal_vertices=0; 1720 for (i=0;i<pcbddc->local_primal_size;i++) { 1721 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1722 if (size_of_constraint == 1) { 1723 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1724 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1725 aux_primal_minloc[total_primal_vertices]=0; 1726 total_primal_vertices++; 1727 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1728 PetscInt min_loc,min_index; 1729 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1730 /* find first untouched local node */ 1731 k = 0; 1732 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1733 min_index = global_indices[k]; 1734 min_loc = k; 1735 /* search the minimum among global nodes already untouched on the cc */ 1736 for (k=1;k<size_of_constraint;k++) { 1737 /* there can be more than one constraint on a single connected component */ 1738 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1739 min_index = global_indices[k]; 1740 min_loc = k; 1741 } 1742 } 1743 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1744 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1745 aux_primal_minloc[total_primal_vertices]=min_loc; 1746 total_primal_vertices++; 1747 } 1748 } 1749 /* determine if a QR strategy is needed for change of basis */ 1750 qr_needed = PETSC_FALSE; 1751 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1752 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1753 if (PetscBTLookup(change_basis,i)) { 1754 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1755 j = 0; 1756 for (k=0;k<size_of_constraint;k++) { 1757 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1758 j++; 1759 } 1760 } 1761 /* found more than one primal dof on the cc */ 1762 if (j > 1) { 1763 PetscBTSet(qr_needed_idx,i); 1764 qr_needed = PETSC_TRUE; 1765 } 1766 } 1767 } 1768 /* free workspace */ 1769 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1770 1771 /* permute indices in order to have a sorted set of vertices */ 1772 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1773 1774 /* nonzero structure of constraint matrix */ 1775 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1776 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1777 j=total_primal_vertices; 1778 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1779 if (!PetscBTLookup(change_basis,i)) { 1780 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1781 j++; 1782 } 1783 } 1784 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1785 ierr = PetscFree(nnz);CHKERRQ(ierr); 1786 /* set values in constraint matrix */ 1787 for (i=0;i<total_primal_vertices;i++) { 1788 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1789 } 1790 total_counts = total_primal_vertices; 1791 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1792 if (!PetscBTLookup(change_basis,i)) { 1793 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1794 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1795 total_counts++; 1796 } 1797 } 1798 /* assembling */ 1799 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1800 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1801 /* 1802 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1803 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1804 */ 1805 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1806 if (pcbddc->use_change_of_basis) { 1807 /* dual and primal dofs on a single cc */ 1808 PetscInt dual_dofs,primal_dofs; 1809 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1810 PetscInt primal_counter; 1811 /* working stuff for GEQRF */ 1812 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1813 PetscBLASInt lqr_work; 1814 /* working stuff for UNGQR */ 1815 PetscScalar *gqr_work,lgqr_work_t; 1816 PetscBLASInt lgqr_work; 1817 /* working stuff for TRTRS */ 1818 PetscScalar *trs_rhs; 1819 PetscBLASInt Blas_NRHS; 1820 /* pointers for values insertion into change of basis matrix */ 1821 PetscInt *start_rows,*start_cols; 1822 PetscScalar *start_vals; 1823 /* working stuff for values insertion */ 1824 PetscBT is_primal; 1825 1826 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1827 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1828 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1829 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1830 /* work arrays */ 1831 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1832 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1833 /* nonzeros per row */ 1834 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1835 if (PetscBTLookup(change_basis,i)) { 1836 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1837 if (PetscBTLookup(qr_needed_idx,i)) { 1838 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1839 } else { 1840 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1841 /* get local primal index on the cc */ 1842 j = 0; 1843 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1844 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1845 } 1846 } 1847 } 1848 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1849 ierr = PetscFree(nnz);CHKERRQ(ierr); 1850 /* Set initial identity in the matrix */ 1851 for (i=0;i<pcis->n_B;i++) { 1852 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1853 } 1854 1855 if (pcbddc->dbg_flag) { 1856 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1857 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1858 } 1859 1860 1861 /* Now we loop on the constraints which need a change of basis */ 1862 /* 1863 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1864 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1865 1866 Basic blocks of change of basis matrix T computed 1867 1868 - Using the following block transformation if there is only a primal dof on the cc 1869 (in the example, primal dof is the last one of the edge in LOCAL ordering 1870 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1871 | 1 0 ... 0 1 | 1872 | 0 1 ... 0 1 | 1873 | ... | 1874 | 0 ... 1 1 | 1875 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1876 1877 - via QR decomposition of constraints otherwise 1878 */ 1879 if (qr_needed) { 1880 /* space to store Q */ 1881 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1882 /* first we issue queries for optimal work */ 1883 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1884 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1885 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1886 lqr_work = -1; 1887 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1888 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1889 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1890 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1891 lgqr_work = -1; 1892 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1893 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1894 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1895 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1896 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1897 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1898 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1899 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1900 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1901 /* array to store scaling factors for reflectors */ 1902 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1903 /* array to store rhs and solution of triangular solver */ 1904 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1905 /* allocating workspace for check */ 1906 if (pcbddc->dbg_flag) { 1907 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1908 } 1909 } 1910 /* array to store whether a node is primal or not */ 1911 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1912 for (i=0;i<total_primal_vertices;i++) { 1913 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1914 } 1915 1916 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1917 /* -> using implicit ordering contained in temp_indices data */ 1918 total_counts = pcbddc->n_vertices; 1919 primal_counter = total_counts; 1920 while (total_counts<pcbddc->local_primal_size) { 1921 primal_dofs = 1; 1922 if (PetscBTLookup(change_basis,total_counts)) { 1923 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1924 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1925 primal_dofs++; 1926 } 1927 /* get constraint info */ 1928 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1929 dual_dofs = size_of_constraint-primal_dofs; 1930 1931 if (pcbddc->dbg_flag) { 1932 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1933 } 1934 1935 if (primal_dofs > 1) { /* QR */ 1936 1937 /* copy quadrature constraints for change of basis check */ 1938 if (pcbddc->dbg_flag) { 1939 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1940 } 1941 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1942 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1943 1944 /* compute QR decomposition of constraints */ 1945 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1946 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1947 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1948 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1949 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1950 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1951 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1952 1953 /* explictly compute R^-T */ 1954 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1955 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1956 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1957 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1958 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1959 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1960 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1961 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1962 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1963 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1964 1965 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1966 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1967 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1968 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1969 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1970 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1971 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1972 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1973 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1974 1975 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1976 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1977 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1978 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1979 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1980 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1981 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1982 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1983 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1984 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1985 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1986 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1987 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1988 1989 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1990 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1991 /* insert cols for primal dofs */ 1992 for (j=0;j<primal_dofs;j++) { 1993 start_vals = &qr_basis[j*size_of_constraint]; 1994 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1995 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1996 } 1997 /* insert cols for dual dofs */ 1998 for (j=0,k=0;j<dual_dofs;k++) { 1999 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2000 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2001 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2002 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2003 j++; 2004 } 2005 } 2006 2007 /* check change of basis */ 2008 if (pcbddc->dbg_flag) { 2009 PetscInt ii,jj; 2010 PetscBool valid_qr=PETSC_TRUE; 2011 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2012 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2013 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2014 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2015 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2016 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2017 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2018 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2019 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2020 for (jj=0;jj<size_of_constraint;jj++) { 2021 for (ii=0;ii<primal_dofs;ii++) { 2022 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2023 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2024 } 2025 } 2026 if (!valid_qr) { 2027 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2028 for (jj=0;jj<size_of_constraint;jj++) { 2029 for (ii=0;ii<primal_dofs;ii++) { 2030 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2031 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2032 } 2033 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2034 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2035 } 2036 } 2037 } 2038 } else { 2039 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2040 } 2041 } 2042 } else { /* simple transformation block */ 2043 PetscInt row,col; 2044 PetscScalar val; 2045 for (j=0;j<size_of_constraint;j++) { 2046 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2047 if (!PetscBTLookup(is_primal,row)) { 2048 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2049 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2050 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2051 } else { 2052 for (k=0;k<size_of_constraint;k++) { 2053 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2054 if (row != col) { 2055 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2056 } else { 2057 val = 1.0; 2058 } 2059 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2060 } 2061 } 2062 } 2063 } 2064 /* increment primal counter */ 2065 primal_counter += primal_dofs; 2066 } else { 2067 if (pcbddc->dbg_flag) { 2068 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2069 } 2070 } 2071 /* increment constraint counter total_counts */ 2072 total_counts += primal_dofs; 2073 } 2074 2075 /* free workspace */ 2076 if (qr_needed) { 2077 if (pcbddc->dbg_flag) { 2078 ierr = PetscFree(work);CHKERRQ(ierr); 2079 } 2080 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2081 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2082 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2083 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2084 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2085 } 2086 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2087 /* assembling */ 2088 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2089 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2090 /* 2091 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2092 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2093 */ 2094 } 2095 2096 /* flush dbg viewer */ 2097 if (pcbddc->dbg_flag) { 2098 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2099 } 2100 2101 /* free workspace */ 2102 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2103 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2104 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2105 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2106 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2107 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2108 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2109 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2110 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2111 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2112 PetscFunctionReturn(0); 2113 } 2114 2115 #undef __FUNCT__ 2116 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2117 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2118 { 2119 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2120 PC_IS *pcis = (PC_IS*)pc->data; 2121 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2122 PetscInt bs,ierr,i,vertex_size; 2123 PetscViewer viewer=pcbddc->dbg_viewer; 2124 2125 PetscFunctionBegin; 2126 /* Reset previously computed graph */ 2127 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2128 /* Init local Graph struct */ 2129 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2130 2131 /* Check validity of the csr graph passed in by the user */ 2132 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2133 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2134 } 2135 2136 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2137 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2138 Mat mat_adj; 2139 const PetscInt *xadj,*adjncy; 2140 PetscBool flg_row=PETSC_TRUE; 2141 2142 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2143 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2144 if (!flg_row) { 2145 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2146 } 2147 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2148 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2149 if (!flg_row) { 2150 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2151 } 2152 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2153 } 2154 2155 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2156 vertex_size = 1; 2157 if (!pcbddc->user_provided_isfordofs) { 2158 if (!pcbddc->n_ISForDofs) { 2159 IS *custom_ISForDofs; 2160 2161 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2162 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2163 for (i=0;i<bs;i++) { 2164 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2165 } 2166 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2167 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2168 /* remove my references to IS objects */ 2169 for (i=0;i<bs;i++) { 2170 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2171 } 2172 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2173 } 2174 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2175 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2176 } 2177 2178 /* Setup of Graph */ 2179 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2180 2181 /* Graph's connected components analysis */ 2182 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2183 2184 /* print some info to stdout */ 2185 if (pcbddc->dbg_flag) { 2186 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2187 } 2188 PetscFunctionReturn(0); 2189 } 2190 2191 #undef __FUNCT__ 2192 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2193 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2194 { 2195 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2196 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2197 PetscErrorCode ierr; 2198 2199 PetscFunctionBegin; 2200 n = 0; 2201 vertices = 0; 2202 if (pcbddc->ConstraintMatrix) { 2203 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2204 for (i=0;i<local_primal_size;i++) { 2205 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2206 if (size_of_constraint == 1) n++; 2207 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2208 } 2209 if (vertices_idx) { 2210 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2211 n = 0; 2212 for (i=0;i<local_primal_size;i++) { 2213 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2214 if (size_of_constraint == 1) { 2215 vertices[n++]=row_cmat_indices[0]; 2216 } 2217 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2218 } 2219 } 2220 } 2221 *n_vertices = n; 2222 if (vertices_idx) *vertices_idx = vertices; 2223 PetscFunctionReturn(0); 2224 } 2225 2226 #undef __FUNCT__ 2227 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2228 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2229 { 2230 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2231 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2232 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2233 PetscBT touched; 2234 PetscErrorCode ierr; 2235 2236 /* This function assumes that the number of local constraints per connected component 2237 is not greater than the number of nodes defined for the connected component 2238 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2239 PetscFunctionBegin; 2240 n = 0; 2241 constraints_index = 0; 2242 if (pcbddc->ConstraintMatrix) { 2243 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2244 max_size_of_constraint = 0; 2245 for (i=0;i<local_primal_size;i++) { 2246 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2247 if (size_of_constraint > 1) { 2248 n++; 2249 } 2250 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2251 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2252 } 2253 if (constraints_idx) { 2254 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2255 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2256 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2257 n = 0; 2258 for (i=0;i<local_primal_size;i++) { 2259 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2260 if (size_of_constraint > 1) { 2261 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2262 /* find first untouched local node */ 2263 j = 0; 2264 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2265 min_index = row_cmat_global_indices[j]; 2266 min_loc = j; 2267 /* search the minimum among nodes not yet touched on the connected component 2268 since there can be more than one constraint on a single cc */ 2269 for (j=1;j<size_of_constraint;j++) { 2270 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2271 min_index = row_cmat_global_indices[j]; 2272 min_loc = j; 2273 } 2274 } 2275 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2276 constraints_index[n++] = row_cmat_indices[min_loc]; 2277 } 2278 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2279 } 2280 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2281 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2282 } 2283 } 2284 *n_constraints = n; 2285 if (constraints_idx) *constraints_idx = constraints_index; 2286 PetscFunctionReturn(0); 2287 } 2288 2289 /* the next two functions has been adapted from pcis.c */ 2290 #undef __FUNCT__ 2291 #define __FUNCT__ "PCBDDCApplySchur" 2292 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2293 { 2294 PetscErrorCode ierr; 2295 PC_IS *pcis = (PC_IS*)(pc->data); 2296 2297 PetscFunctionBegin; 2298 if (!vec2_B) { vec2_B = v; } 2299 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2300 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2301 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2302 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2303 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 #undef __FUNCT__ 2308 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2309 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2310 { 2311 PetscErrorCode ierr; 2312 PC_IS *pcis = (PC_IS*)(pc->data); 2313 2314 PetscFunctionBegin; 2315 if (!vec2_B) { vec2_B = v; } 2316 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2317 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2318 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2319 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2320 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2321 PetscFunctionReturn(0); 2322 } 2323 2324 #undef __FUNCT__ 2325 #define __FUNCT__ "PCBDDCSubsetNumbering" 2326 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2327 { 2328 Vec local_vec,global_vec; 2329 IS seqis,paris; 2330 VecScatter scatter_ctx; 2331 PetscScalar *array; 2332 PetscInt *temp_global_dofs; 2333 PetscScalar globalsum; 2334 PetscInt i,j,s; 2335 PetscInt nlocals,first_index,old_index,max_local; 2336 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2337 PetscMPIInt *dof_sizes,*dof_displs; 2338 PetscBool first_found; 2339 PetscErrorCode ierr; 2340 2341 PetscFunctionBegin; 2342 /* mpi buffers */ 2343 MPI_Comm_size(comm,&size_prec_comm); 2344 MPI_Comm_rank(comm,&rank_prec_comm); 2345 j = ( !rank_prec_comm ? size_prec_comm : 0); 2346 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2347 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2348 /* get maximum size of subset */ 2349 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2350 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2351 max_local = 0; 2352 if (n_local_dofs) { 2353 max_local = temp_global_dofs[0]; 2354 for (i=1;i<n_local_dofs;i++) { 2355 if (max_local < temp_global_dofs[i] ) { 2356 max_local = temp_global_dofs[i]; 2357 } 2358 } 2359 } 2360 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2361 max_global++; 2362 max_local = 0; 2363 if (n_local_dofs) { 2364 max_local = local_dofs[0]; 2365 for (i=1;i<n_local_dofs;i++) { 2366 if (max_local < local_dofs[i] ) { 2367 max_local = local_dofs[i]; 2368 } 2369 } 2370 } 2371 max_local++; 2372 /* allocate workspace */ 2373 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2374 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2375 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2376 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2377 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2378 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2379 /* create scatter */ 2380 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2381 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2382 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2383 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2384 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2385 /* init array */ 2386 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2387 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2388 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2389 if (local_dofs_mult) { 2390 for (i=0;i<n_local_dofs;i++) { 2391 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2392 } 2393 } else { 2394 for (i=0;i<n_local_dofs;i++) { 2395 array[local_dofs[i]]=1.0; 2396 } 2397 } 2398 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2399 /* scatter into global vec and get total number of global dofs */ 2400 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2401 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2402 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2403 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2404 /* Fill global_vec with cumulative function for global numbering */ 2405 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2406 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2407 nlocals = 0; 2408 first_index = -1; 2409 first_found = PETSC_FALSE; 2410 for (i=0;i<s;i++) { 2411 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2412 first_found = PETSC_TRUE; 2413 first_index = i; 2414 } 2415 nlocals += (PetscInt)PetscRealPart(array[i]); 2416 } 2417 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2418 if (!rank_prec_comm) { 2419 dof_displs[0]=0; 2420 for (i=1;i<size_prec_comm;i++) { 2421 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2422 } 2423 } 2424 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2425 if (first_found) { 2426 array[first_index] += (PetscScalar)nlocals; 2427 old_index = first_index; 2428 for (i=first_index+1;i<s;i++) { 2429 if (PetscRealPart(array[i]) > 0.0) { 2430 array[i] += array[old_index]; 2431 old_index = i; 2432 } 2433 } 2434 } 2435 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2436 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2437 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2438 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2439 /* get global ordering of local dofs */ 2440 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2441 if (local_dofs_mult) { 2442 for (i=0;i<n_local_dofs;i++) { 2443 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2444 } 2445 } else { 2446 for (i=0;i<n_local_dofs;i++) { 2447 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2448 } 2449 } 2450 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2451 /* free workspace */ 2452 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2453 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2454 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2455 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2456 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2457 /* return pointer to global ordering of local dofs */ 2458 *global_numbering_subset = temp_global_dofs; 2459 PetscFunctionReturn(0); 2460 } 2461 2462 #undef __FUNCT__ 2463 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2464 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2465 { 2466 PetscInt i,j; 2467 PetscScalar *alphas; 2468 PetscErrorCode ierr; 2469 2470 PetscFunctionBegin; 2471 /* this implements stabilized Gram-Schmidt */ 2472 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2473 for (i=0;i<n;i++) { 2474 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2475 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2476 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2477 } 2478 ierr = PetscFree(alphas);CHKERRQ(ierr); 2479 PetscFunctionReturn(0); 2480 } 2481 2482 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2483 #undef __FUNCT__ 2484 #define __FUNCT__ "MatConvert_IS_AIJ" 2485 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2486 { 2487 Mat new_mat; 2488 Mat_IS *matis = (Mat_IS*)(mat->data); 2489 /* info on mat */ 2490 PetscInt bs,rows,cols; 2491 PetscInt lrows,lcols; 2492 PetscInt local_rows,local_cols; 2493 PetscMPIInt nsubdomains; 2494 /* preallocation */ 2495 Vec vec_dnz,vec_onz; 2496 PetscScalar *my_dnz,*my_onz,*array; 2497 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2498 PetscInt index_row,index_col,owner; 2499 PetscInt *local_indices,*global_indices; 2500 /* work */ 2501 PetscInt i,j; 2502 PetscErrorCode ierr; 2503 2504 PetscFunctionBegin; 2505 /* CHECKS */ 2506 /* get info from mat */ 2507 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2508 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2509 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2510 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2511 2512 /* MAT_INITIAL_MATRIX */ 2513 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2514 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2515 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2516 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2517 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2518 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2519 2520 /* 2521 preallocation 2522 */ 2523 2524 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2525 /* 2526 Some vectors are needed to sum up properly on shared interface dofs. 2527 Note that preallocation is not exact, since it overestimates nonzeros 2528 */ 2529 /* 2530 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2531 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2532 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2533 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2534 */ 2535 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2536 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2537 /* All processes need to compute entire row ownership */ 2538 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2539 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2540 for (i=0;i<nsubdomains;i++) { 2541 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2542 row_ownership[j]=i; 2543 } 2544 } 2545 /* map indices of local mat to global values */ 2546 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2547 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2548 for (i=0;i<local_rows;i++) local_indices[i]=i; 2549 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2550 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2551 2552 /* 2553 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2554 then, they will be summed up properly. This way, preallocation is always sufficient 2555 */ 2556 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2557 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2558 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2559 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2560 for (i=0;i<local_rows;i++) { 2561 index_row = global_indices[i]; 2562 for (j=i;j<local_rows;j++) { 2563 owner = row_ownership[index_row]; 2564 index_col = global_indices[j]; 2565 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2566 my_dnz[i] += 1.0; 2567 } else { /* offdiag block */ 2568 my_onz[i] += 1.0; 2569 } 2570 /* same as before, interchanging rows and cols */ 2571 if (i != j) { 2572 owner = row_ownership[index_col]; 2573 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2574 my_dnz[j] += 1.0; 2575 } else { 2576 my_onz[j] += 1.0; 2577 } 2578 } 2579 } 2580 } 2581 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2582 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2583 if (local_rows) { /* multilevel guard */ 2584 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2585 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2586 } 2587 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2588 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2589 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2590 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2591 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2592 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2593 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2594 2595 /* set computed preallocation in dnz and onz */ 2596 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2597 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2598 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2599 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2600 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2601 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2602 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2603 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2604 2605 /* Resize preallocation if overestimated */ 2606 for (i=0;i<lrows;i++) { 2607 dnz[i] = PetscMin(dnz[i],lcols); 2608 onz[i] = PetscMin(onz[i],cols-lcols); 2609 } 2610 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2611 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2612 2613 /* 2614 Set values. Very Basic. 2615 */ 2616 for (i=0;i<local_rows;i++) { 2617 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2618 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2619 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2620 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2621 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2622 } 2623 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2624 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2625 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2626 2627 /* get back mat */ 2628 *M = new_mat; 2629 PetscFunctionReturn(0); 2630 } 2631 2632 #undef __FUNCT__ 2633 #define __FUNCT__ "MatISSubassemble_Private" 2634 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2635 { 2636 Mat subdomain_adj; 2637 IS new_ranks,ranks_send_to; 2638 MatPartitioning partitioner; 2639 Mat_IS *matis; 2640 PetscInt n_neighs,*neighs,*n_shared,**shared; 2641 PetscInt prank; 2642 PetscMPIInt size,rank,color; 2643 PetscInt *xadj,*adjncy,*oldranks; 2644 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2645 PetscInt i,j,n_subdomains,local_size,threshold=0; 2646 PetscErrorCode ierr; 2647 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2648 PetscSubcomm subcomm; 2649 2650 PetscFunctionBegin; 2651 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2652 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2653 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2654 2655 /* Get info on mapping */ 2656 matis = (Mat_IS*)(mat->data); 2657 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2658 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2659 2660 /* build local CSR graph of subdomains' connectivity */ 2661 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2662 xadj[0] = 0; 2663 xadj[1] = PetscMax(n_neighs-1,0); 2664 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2665 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2666 2667 if (threshold) { 2668 PetscInt* count,min_threshold; 2669 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2670 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2671 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2672 for (j=0;j<n_shared[i];j++) { 2673 count[shared[i][j]] += 1; 2674 } 2675 } 2676 /* adapt threshold since we dont want to lose significant connections */ 2677 min_threshold = n_neighs; 2678 for (i=1;i<n_neighs;i++) { 2679 for (j=0;j<n_shared[i];j++) { 2680 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2681 } 2682 } 2683 threshold = PetscMax(min_threshold+1,threshold); 2684 xadj[1] = 0; 2685 for (i=1;i<n_neighs;i++) { 2686 for (j=0;j<n_shared[i];j++) { 2687 if (count[shared[i][j]] < threshold) { 2688 adjncy[xadj[1]] = neighs[i]; 2689 adjncy_wgt[xadj[1]] = n_shared[i]; 2690 xadj[1]++; 2691 break; 2692 } 2693 } 2694 } 2695 ierr = PetscFree(count);CHKERRQ(ierr); 2696 } else { 2697 if (xadj[1]) { 2698 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2699 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2700 } 2701 } 2702 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2703 if (use_square) { 2704 for (i=0;i<xadj[1];i++) { 2705 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2706 } 2707 } 2708 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2709 2710 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2711 2712 /* 2713 Restrict work on active processes only. 2714 */ 2715 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2716 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2717 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2718 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2719 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2720 if (color) { 2721 ierr = PetscFree(xadj);CHKERRQ(ierr); 2722 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2723 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2724 } else { 2725 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2726 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2727 prank = rank; 2728 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2729 for (i=0;i<size;i++) { 2730 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2731 } 2732 for (i=0;i<xadj[1];i++) { 2733 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2734 } 2735 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2736 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2737 n_subdomains = (PetscInt)size; 2738 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2739 2740 /* Partition */ 2741 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2742 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2743 if (use_vwgt) { 2744 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2745 v_wgt[0] = local_size; 2746 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2747 } 2748 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2749 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2750 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2751 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2752 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2753 2754 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2755 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2756 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2757 /* clean up */ 2758 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2759 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2760 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2761 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2762 } 2763 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2764 2765 /* assemble parallel IS for sends */ 2766 i = 1; 2767 if (color) i=0; 2768 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2769 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2770 2771 /* get back IS */ 2772 *is_sends = ranks_send_to; 2773 PetscFunctionReturn(0); 2774 } 2775 2776 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2777 2778 #undef __FUNCT__ 2779 #define __FUNCT__ "MatISSubassemble" 2780 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2781 { 2782 Mat local_mat,new_mat; 2783 Mat_IS *matis; 2784 IS is_sends_internal; 2785 PetscInt rows,cols; 2786 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2787 PetscBool ismatis,isdense; 2788 ISLocalToGlobalMapping l2gmap; 2789 PetscInt* l2gmap_indices; 2790 const PetscInt* is_indices; 2791 MatType new_local_type; 2792 MatTypePrivate new_local_type_private; 2793 /* buffers */ 2794 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2795 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2796 /* MPI */ 2797 MPI_Comm comm; 2798 PetscMPIInt n_sends,n_recvs,commsize; 2799 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2800 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2801 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2802 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2803 PetscErrorCode ierr; 2804 2805 PetscFunctionBegin; 2806 /* checks */ 2807 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2808 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2809 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2810 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2811 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2812 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2813 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2814 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2815 PetscValidLogicalCollectiveInt(mat,bs,0); 2816 /* prepare IS for sending if not provided */ 2817 if (!is_sends) { 2818 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2819 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2820 } else { 2821 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2822 is_sends_internal = is_sends; 2823 } 2824 2825 /* get pointer of MATIS data */ 2826 matis = (Mat_IS*)mat->data; 2827 2828 /* get comm */ 2829 comm = PetscObjectComm((PetscObject)mat); 2830 2831 /* compute number of sends */ 2832 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2833 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2834 2835 /* compute number of receives */ 2836 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2837 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2838 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2839 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2840 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2841 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2842 ierr = PetscFree(iflags);CHKERRQ(ierr); 2843 2844 /* prepare send/receive buffers */ 2845 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2846 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2847 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2848 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2849 2850 /* Get data from local mat */ 2851 if (!isdense) { 2852 /* TODO: See below some guidelines on how to prepare the local buffers */ 2853 /* 2854 send_buffer_vals should contain the raw values of the local matrix 2855 send_buffer_idxs should contain: 2856 - MatType_PRIVATE type 2857 - PetscInt size_of_l2gmap 2858 - PetscInt global_row_indices[size_of_l2gmap] 2859 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2860 */ 2861 } else { 2862 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2863 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2864 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2865 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2866 send_buffer_idxs[1] = i; 2867 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2868 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2869 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2870 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2871 for (i=0;i<n_sends;i++) { 2872 ilengths_vals[is_indices[i]] = len*len; 2873 ilengths_idxs[is_indices[i]] = len+2; 2874 } 2875 } 2876 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2877 buf_size_idxs = 0; 2878 buf_size_vals = 0; 2879 for (i=0;i<n_recvs;i++) { 2880 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2881 buf_size_vals += (PetscInt)olengths_vals[i]; 2882 } 2883 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2884 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2885 2886 /* get new tags for clean communications */ 2887 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2888 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2889 2890 /* allocate for requests */ 2891 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2892 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2893 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2894 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2895 2896 /* communications */ 2897 ptr_idxs = recv_buffer_idxs; 2898 ptr_vals = recv_buffer_vals; 2899 for (i=0;i<n_recvs;i++) { 2900 source_dest = onodes[i]; 2901 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2902 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2903 ptr_idxs += olengths_idxs[i]; 2904 ptr_vals += olengths_vals[i]; 2905 } 2906 for (i=0;i<n_sends;i++) { 2907 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2908 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2909 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2910 } 2911 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2912 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2913 2914 /* assemble new l2g map */ 2915 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2916 ptr_idxs = recv_buffer_idxs; 2917 buf_size_idxs = 0; 2918 for (i=0;i<n_recvs;i++) { 2919 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2920 ptr_idxs += olengths_idxs[i]; 2921 } 2922 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2923 ptr_idxs = recv_buffer_idxs; 2924 buf_size_idxs = 0; 2925 for (i=0;i<n_recvs;i++) { 2926 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2927 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2928 ptr_idxs += olengths_idxs[i]; 2929 } 2930 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2931 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2932 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2933 2934 /* infer new local matrix type from received local matrices type */ 2935 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2936 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2937 new_local_type_private = MATAIJ_PRIVATE; 2938 new_local_type = MATSEQAIJ; 2939 if (n_recvs) { 2940 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 2941 ptr_idxs = recv_buffer_idxs; 2942 for (i=0;i<n_recvs;i++) { 2943 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2944 new_local_type_private = MATAIJ_PRIVATE; 2945 break; 2946 } 2947 ptr_idxs += olengths_idxs[i]; 2948 } 2949 switch (new_local_type_private) { 2950 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2951 new_local_type = MATSEQAIJ; 2952 bs = 1; 2953 break; 2954 case MATAIJ_PRIVATE: 2955 new_local_type = MATSEQAIJ; 2956 bs = 1; 2957 break; 2958 case MATBAIJ_PRIVATE: 2959 new_local_type = MATSEQBAIJ; 2960 break; 2961 case MATSBAIJ_PRIVATE: 2962 new_local_type = MATSEQSBAIJ; 2963 break; 2964 default: 2965 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2966 break; 2967 } 2968 } 2969 2970 /* create MATIS object */ 2971 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2972 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2973 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2974 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2975 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2976 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2977 2978 /* set values */ 2979 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2980 ptr_vals = recv_buffer_vals; 2981 ptr_idxs = recv_buffer_idxs; 2982 for (i=0;i<n_recvs;i++) { 2983 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2984 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2985 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2986 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2987 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2988 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2989 } 2990 ptr_idxs += olengths_idxs[i]; 2991 ptr_vals += olengths_vals[i]; 2992 } 2993 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2994 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2995 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2996 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2997 2998 { /* check */ 2999 Vec lvec,rvec; 3000 PetscReal infty_error; 3001 3002 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3003 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3004 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3005 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3006 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 3007 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3008 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3009 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3010 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3011 } 3012 3013 /* free workspace */ 3014 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3015 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3016 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3017 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3018 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3019 if (isdense) { 3020 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3021 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3022 } else { 3023 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3024 } 3025 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3026 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3027 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3028 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3029 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3030 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3031 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3032 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3033 ierr = PetscFree(onodes);CHKERRQ(ierr); 3034 /* get back new mat */ 3035 *mat_n = new_mat; 3036 PetscFunctionReturn(0); 3037 } 3038 3039 #undef __FUNCT__ 3040 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3041 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3042 { 3043 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3044 PC_IS *pcis = (PC_IS*)pc->data; 3045 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3046 MatNullSpace CoarseNullSpace=NULL; 3047 ISLocalToGlobalMapping coarse_islg; 3048 IS coarse_is; 3049 PetscInt max_it; 3050 PetscInt im_active=-1,active_procs=-1; 3051 PC pc_temp; 3052 PCType coarse_pc_type; 3053 KSPType coarse_ksp_type; 3054 PetscBool multilevel_requested,multilevel_allowed; 3055 PetscBool setsym,issym,isbddc,isnn; 3056 MatStructure matstruct; 3057 PetscErrorCode ierr; 3058 3059 PetscFunctionBegin; 3060 /* Assign global numbering to coarse dofs */ 3061 if (pcbddc->new_primal_space) { /* this is suboptimal -> we can chech when creating local primal indices */ 3062 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3063 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3064 /* see if we can avoid some work */ 3065 if (pcbddc->coarse_ksp) { /* already present */ 3066 Mat tcoarse_mat; 3067 PetscInt ocoarse_size; 3068 ierr = KSPGetOperators(pcbddc->coarse_ksp,NULL,&tcoarse_mat,NULL);CHKERRQ(ierr); 3069 ierr = MatGetSize(tcoarse_mat,&ocoarse_size,NULL);CHKERRQ(ierr); 3070 if (ocoarse_size != pcbddc->coarse_size) { /* we need to destroy the KSP */ 3071 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 3072 } 3073 } 3074 } 3075 3076 /* infer some info from user */ 3077 issym = PETSC_FALSE; 3078 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3079 multilevel_allowed = PETSC_FALSE; 3080 multilevel_requested = PETSC_FALSE; 3081 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 3082 if (multilevel_requested) { 3083 /* count "active processes" */ 3084 im_active = !!(pcis->n); 3085 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3086 if (active_procs/pcbddc->coarsening_ratio < 2) { 3087 multilevel_allowed = PETSC_FALSE; 3088 } else { 3089 multilevel_allowed = PETSC_TRUE; 3090 } 3091 } 3092 3093 /* set defaults for coarse KSP and PC */ 3094 if (multilevel_allowed) { 3095 if (issym) { 3096 coarse_ksp_type = KSPRICHARDSON; 3097 } else { 3098 coarse_ksp_type = KSPCHEBYSHEV; 3099 } 3100 coarse_pc_type = PCBDDC; 3101 } else { 3102 coarse_ksp_type = KSPPREONLY; 3103 coarse_pc_type = PCREDUNDANT; 3104 } 3105 3106 /* create the coarse KSP object only once with defaults */ 3107 if (!pcbddc->coarse_ksp) { 3108 char prefix[256],str_level[3]; 3109 size_t len; 3110 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3111 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3112 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3113 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3114 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3115 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3116 /* prefix */ 3117 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3118 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3119 if (!pcbddc->current_level) { 3120 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3121 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3122 } else { 3123 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3124 if (pcbddc->current_level>1) len -= 2; 3125 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3126 *(prefix+len)='\0'; 3127 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3128 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3129 } 3130 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3131 } 3132 /* allow user customization */ 3133 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3134 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3135 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3136 3137 /* get some info after set from options */ 3138 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3139 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3140 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3141 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3142 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3143 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3144 isbddc = PETSC_FALSE; 3145 } 3146 3147 /* propagate BDDC info to the next level */ 3148 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3149 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3150 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3151 3152 /* creates temporary MATIS object for coarse matrix */ 3153 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3154 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3155 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3156 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3157 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3158 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3159 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3160 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3161 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3162 3163 /* assemble coarse matrix */ 3164 if (isbddc || isnn) { 3165 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3166 } else { /* need to provide reuse */ 3167 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3168 } 3169 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3170 3171 /* create local to global scatters for coarse problem */ 3172 if (pcbddc->new_primal_space) { 3173 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3174 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 3175 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3176 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3177 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3178 } 3179 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3180 3181 /* propagate symmetry info to coarse matrix */ 3182 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3183 3184 /* Compute coarse null space (special handling by BDDC only) */ 3185 if (pcbddc->NullSpace) { 3186 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3187 if (isbddc) { 3188 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3189 } else { 3190 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3191 } 3192 } 3193 3194 /* set operators */ 3195 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3196 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3197 3198 /* additional KSP customization */ 3199 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3200 if (max_it < 5) { 3201 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3202 } 3203 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3204 3205 3206 /* print some info if requested */ 3207 if (pcbddc->dbg_flag) { 3208 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3209 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3210 if (!multilevel_allowed) { 3211 if (multilevel_requested) { 3212 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3213 } else if (pcbddc->max_levels) { 3214 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3215 } 3216 } 3217 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3218 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3219 } 3220 3221 /* setup coarse ksp */ 3222 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3223 if (pcbddc->dbg_flag) { 3224 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3225 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3226 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3227 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3228 } 3229 3230 /* Check coarse problem if requested */ 3231 if (pcbddc->dbg_flag) { 3232 KSP check_ksp; 3233 KSPType check_ksp_type; 3234 PC check_pc; 3235 Vec check_vec; 3236 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3237 PetscInt its; 3238 PetscBool ispreonly,compute; 3239 3240 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3241 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3242 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3243 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3244 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3245 if (ispreonly) { 3246 check_ksp_type = KSPPREONLY; 3247 compute = PETSC_FALSE; 3248 } else { 3249 if (issym) check_ksp_type = KSPCG; 3250 else check_ksp_type = KSPGMRES; 3251 compute = PETSC_TRUE; 3252 } 3253 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3254 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3255 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3256 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3257 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3258 /* create random vec */ 3259 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3260 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3261 if (CoarseNullSpace) { 3262 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3263 } 3264 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3265 /* solve coarse problem */ 3266 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3267 if (CoarseNullSpace) { 3268 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3269 } 3270 /* check coarse problem residual error */ 3271 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3272 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3273 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3274 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3275 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3276 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3277 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3278 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3279 /* get eigenvalue estimation if preonly has not been requested */ 3280 if (!ispreonly) { 3281 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3282 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3283 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3284 } 3285 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3286 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3287 } 3288 /* free memory */ 3289 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3290 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3291 PetscFunctionReturn(0); 3292 } 3293 3294 #undef __FUNCT__ 3295 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3296 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3297 { 3298 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3299 PC_IS* pcis = (PC_IS*)pc->data; 3300 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3301 PetscInt i,j,coarse_size; 3302 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3303 PetscErrorCode ierr; 3304 3305 PetscFunctionBegin; 3306 /* get indices in local ordering for vertices and constraints */ 3307 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3308 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3309 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3310 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3311 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3312 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3313 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3314 3315 /* Compute global number of coarse dofs */ 3316 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3317 3318 /* check numbering */ 3319 if (pcbddc->dbg_flag) { 3320 PetscScalar coarsesum,*array; 3321 3322 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3323 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3324 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3325 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3326 for (i=0;i<pcbddc->local_primal_size;i++) { 3327 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3328 } 3329 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3330 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3331 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3332 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3333 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3334 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3335 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3336 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3337 for (i=0;i<pcis->n;i++) { 3338 if (array[i] == 1.0) { 3339 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3340 } 3341 } 3342 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3343 for (i=0;i<pcis->n;i++) { 3344 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3345 } 3346 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3347 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3348 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3349 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3350 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3351 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3352 if (pcbddc->dbg_flag > 1) { 3353 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3354 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3355 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3356 for (i=0;i<pcbddc->local_primal_size;i++) { 3357 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]); 3358 } 3359 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3360 } 3361 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3362 } 3363 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3364 /* get back data */ 3365 *coarse_size_n = coarse_size; 3366 *local_primal_indices_n = local_primal_indices; 3367 PetscFunctionReturn(0); 3368 } 3369 3370