1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 23 24 /* Change global null space passed in by the user if change of basis has been requested */ 25 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 26 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 27 } 28 29 /* 30 Setup local correction and local part of coarse basis. 31 Gives back the dense local part of the coarse matrix in column major ordering 32 */ 33 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 34 35 /* Compute total number of coarse nodes and setup coarse solver */ 36 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 37 38 /* free */ 39 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 40 PetscFunctionReturn(0); 41 } 42 43 #undef __FUNCT__ 44 #define __FUNCT__ "PCBDDCResetCustomization" 45 PetscErrorCode PCBDDCResetCustomization(PC pc) 46 { 47 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 48 PetscInt i; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 57 for (i=0;i<pcbddc->n_ISForDofs;i++) { 58 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 59 } 60 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 61 pcbddc->n_ISForDofs = 0; 62 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 63 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "PCBDDCResetTopography" 69 PetscErrorCode PCBDDCResetTopography(PC pc) 70 { 71 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 72 PetscErrorCode ierr; 73 74 PetscFunctionBegin; 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 107 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 108 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 110 PetscFunctionReturn(0); 111 } 112 113 #undef __FUNCT__ 114 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 115 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 116 { 117 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 118 PC_IS *pcis = (PC_IS*)pc->data; 119 VecType impVecType; 120 PetscInt n_vertices,n_constraints,local_primal_size,n_R,old_size; 121 PetscErrorCode ierr; 122 123 PetscFunctionBegin; 124 if (!pcbddc->ConstraintMatrix) { 125 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 126 } 127 /* get sizes */ 128 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,NULL);CHKERRQ(ierr); 129 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 130 n_constraints = local_primal_size - n_vertices; 131 n_R = pcis->n-n_vertices; 132 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 133 /* local work vectors (try to avoid unneeded work)*/ 134 /* R nodes */ 135 old_size = -1; 136 if (pcbddc->vec1_R) { 137 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 138 } 139 if (n_R != old_size) { 140 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 141 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 142 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 143 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 144 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 145 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 146 } 147 /* local primal dofs */ 148 old_size = -1; 149 if (pcbddc->vec1_P) { 150 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 151 } 152 if (local_primal_size != old_size) { 153 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 154 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 155 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 156 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 157 } 158 /* local explicit constraints */ 159 old_size = -1; 160 if (pcbddc->vec1_C) { 161 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 162 } 163 if (n_constraints && n_constraints != old_size) { 164 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 165 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 166 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 167 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "PCBDDCSetUpCorrection" 174 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 175 { 176 PetscErrorCode ierr; 177 /* pointers to pcis and pcbddc */ 178 PC_IS* pcis = (PC_IS*)pc->data; 179 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 180 /* submatrices of local problem */ 181 Mat A_RV,A_VR,A_VV; 182 /* working matrices */ 183 Mat M1,M2,M3,C_CR; 184 /* working vectors */ 185 Vec vec1_C,vec2_C,vec1_V,vec2_V; 186 /* additional working stuff */ 187 IS is_aux; 188 ISLocalToGlobalMapping BtoNmap; 189 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 190 const PetscScalar *array,*row_cmat_values; 191 const PetscInt *row_cmat_indices,*idx_R_local; 192 PetscInt *vertices,*idx_V_B,*auxindices; 193 PetscInt n_vertices,n_constraints,size_of_constraint; 194 PetscInt i,j,n_R,n_D,n_B; 195 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 196 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 197 MatType impMatType; 198 /* some shortcuts to scalars */ 199 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 200 /* for debugging purposes */ 201 PetscReal *coarsefunctions_errors,*constraints_errors; 202 203 PetscFunctionBegin; 204 /* get number of vertices and their local indices */ 205 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 206 n_constraints = pcbddc->local_primal_size-n_vertices; 207 /* Set Non-overlapping dimensions */ 208 n_B = pcis->n_B; n_D = pcis->n - n_B; 209 n_R = pcis->n-n_vertices; 210 211 /* Set types for local objects needed by BDDC precondtioner */ 212 impMatType = MATSEQDENSE; 213 214 /* Allocating some extra storage just to be safe */ 215 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 216 for (i=0;i<pcis->n;i++) auxindices[i]=i; 217 218 /* vertices in boundary numbering */ 219 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 220 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 221 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 222 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 223 if (i != n_vertices) { 224 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 225 } 226 227 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 228 if (n_constraints) { 229 /* see if we can save some allocations */ 230 if (pcbddc->local_auxmat2) { 231 PetscInt on_R,on_constraints; 232 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 233 if (on_R != n_R || on_constraints != n_constraints) { 234 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 235 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 236 } 237 } 238 /* work vectors */ 239 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 240 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 241 /* auxiliary matrices */ 242 if (!pcbddc->local_auxmat2) { 243 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 244 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 245 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 246 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 247 } 248 249 /* Extract constraints on R nodes: C_{CR} */ 250 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 251 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 252 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 253 254 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 255 for (i=0;i<n_constraints;i++) { 256 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 257 /* Get row of constraint matrix in R numbering */ 258 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 259 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 260 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 261 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 262 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 263 /* Solve for row of constraint matrix in R numbering */ 264 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 265 /* Set values in local_auxmat2 */ 266 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 267 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 268 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 269 } 270 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 271 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 272 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 273 274 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 275 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 276 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 277 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 278 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 279 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 280 ierr = MatSetUp(M1);CHKERRQ(ierr); 281 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 282 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 283 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 284 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 285 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 286 ierr = MatDestroy(&M2);CHKERRQ(ierr); 287 ierr = MatDestroy(&M3);CHKERRQ(ierr); 288 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 289 if (!pcbddc->local_auxmat1) { 290 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 291 } else { 292 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 293 } 294 } 295 296 /* Get submatrices from subdomain matrix */ 297 if (n_vertices) { 298 PetscInt ibs,mbs; 299 PetscBool issbaij; 300 Mat newmat; 301 302 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 303 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 304 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 305 if (ibs != mbs) { /* need to convert to SEQAIJ */ 306 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 307 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 308 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 309 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 310 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 311 } else { 312 /* this is safe */ 313 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 314 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 315 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 316 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 317 /* which of the two approaches is faster? */ 318 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 319 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 320 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 321 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 322 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 323 } else { 324 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 325 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 326 } 327 } 328 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 329 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 330 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 331 } 332 333 /* Matrix of coarse basis functions (local) */ 334 if (pcbddc->coarse_phi_B) { 335 PetscInt on_B,on_primal; 336 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 337 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 338 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 339 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 340 } 341 } 342 if (pcbddc->coarse_phi_D) { 343 PetscInt on_D,on_primal; 344 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 345 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 346 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 347 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 348 } 349 } 350 if (!pcbddc->coarse_phi_B) { 351 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 352 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 353 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 354 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 355 } 356 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 357 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 358 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 359 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 360 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 361 } 362 363 if (pcbddc->dbg_flag) { 364 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 365 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 366 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 367 } 368 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 369 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 370 371 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 372 373 /* vertices */ 374 for (i=0;i<n_vertices;i++) { 375 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 376 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 377 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 378 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 379 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 380 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 381 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 382 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 383 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 384 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 385 if (n_constraints) { 386 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 387 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 388 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 389 } 390 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 391 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 392 393 /* Set values in coarse basis function and subdomain part of coarse_mat */ 394 /* coarse basis functions */ 395 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 396 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 397 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 398 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 399 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 400 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 401 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 402 if (pcbddc->switch_static || pcbddc->dbg_flag) { 403 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 404 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 405 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 406 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 407 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 408 } 409 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 410 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 411 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 412 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 413 if (n_constraints) { 414 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 415 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 416 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 417 } 418 419 /* check */ 420 if (pcbddc->dbg_flag) { 421 /* assemble subdomain vector on local nodes */ 422 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 423 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 424 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 425 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 426 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 427 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 428 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 429 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 430 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 431 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 432 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 433 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 434 if (n_constraints) { 435 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 436 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 437 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 438 } 439 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 440 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 441 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 442 /* check saddle point solution */ 443 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 444 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 445 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 446 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 447 /* shift by the identity matrix */ 448 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 449 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 450 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 451 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 452 } 453 } 454 455 /* constraints */ 456 for (i=0;i<n_constraints;i++) { 457 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 458 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 459 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 460 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 461 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 462 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 463 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 464 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 465 if (n_vertices) { 466 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 467 } 468 /* Set values in coarse basis function and subdomain part of coarse_mat */ 469 /* coarse basis functions */ 470 j = i+n_vertices; /* don't touch this! */ 471 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 472 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 473 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 474 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 475 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 476 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 477 if (pcbddc->switch_static || pcbddc->dbg_flag) { 478 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 480 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 481 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 482 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 483 } 484 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 485 if (n_vertices) { 486 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 487 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 488 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 489 } 490 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 491 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 492 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 493 494 if (pcbddc->dbg_flag) { 495 /* assemble subdomain vector on nodes */ 496 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 497 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 498 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 499 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 500 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 501 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 502 /* assemble subdomain vector of lagrange multipliers */ 503 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 504 if (n_vertices) { 505 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 506 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 507 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 508 } 509 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 510 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 511 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 512 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 513 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 514 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 515 /* check saddle point solution */ 516 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 517 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 518 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 519 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 520 /* shift by the identity matrix */ 521 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 522 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 523 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 524 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 525 } 526 } 527 /* call assembling routines for local coarse basis */ 528 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 529 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 530 if (pcbddc->switch_static || pcbddc->dbg_flag) { 531 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 532 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 533 } 534 535 /* compute other basis functions for non-symmetric problems */ 536 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 537 if (!setsym || (setsym && !issym)) { 538 if (!pcbddc->coarse_psi_B) { 539 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 540 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 541 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 542 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 543 } 544 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 545 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 546 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 547 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 548 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 549 } 550 for (i=0;i<pcbddc->local_primal_size;i++) { 551 if (n_constraints) { 552 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 553 for (j=0;j<n_constraints;j++) { 554 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 555 } 556 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 557 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 558 } 559 if (i<n_vertices) { 560 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 561 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 562 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 563 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 564 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 565 if (n_constraints) { 566 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 567 } 568 } else { 569 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 570 } 571 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 572 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 573 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 574 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 575 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 576 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 577 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 578 if (i<n_vertices) { 579 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 580 } 581 if (pcbddc->switch_static || pcbddc->dbg_flag) { 582 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 583 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 584 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 585 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 586 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 587 } 588 589 if (pcbddc->dbg_flag) { 590 /* assemble subdomain vector on nodes */ 591 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 592 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 593 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 594 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 595 if (i<n_vertices) { 596 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 597 } 598 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 599 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 600 /* assemble subdomain vector of lagrange multipliers */ 601 for (j=0;j<pcbddc->local_primal_size;j++) { 602 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 603 } 604 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 605 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 606 /* check saddle point solution */ 607 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 608 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 609 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 610 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 611 /* shift by the identity matrix */ 612 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 613 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 614 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 615 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 616 } 617 } 618 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 619 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 620 if (pcbddc->switch_static || pcbddc->dbg_flag) { 621 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 622 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 623 } 624 } 625 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 626 /* Checking coarse_sub_mat and coarse basis functios */ 627 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 628 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 629 if (pcbddc->dbg_flag) { 630 Mat coarse_sub_mat; 631 Mat AUXMAT,TM1,TM2,TM3,TM4; 632 Mat coarse_phi_D,coarse_phi_B; 633 Mat coarse_psi_D,coarse_psi_B; 634 Mat A_II,A_BB,A_IB,A_BI; 635 MatType checkmattype=MATSEQAIJ; 636 PetscReal real_value; 637 638 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 639 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 640 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 641 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 642 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 643 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 644 if (pcbddc->coarse_psi_B) { 645 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 646 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 647 } 648 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 649 650 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 651 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 652 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 653 if (pcbddc->coarse_psi_B) { 654 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 655 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 656 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 657 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 658 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 659 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 660 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 661 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 662 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 663 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 664 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 665 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 666 } else { 667 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 668 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 669 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 670 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 671 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 672 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 673 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 674 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 675 } 676 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 677 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 678 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 679 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 680 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 681 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 682 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 683 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 684 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 685 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 686 for (i=0;i<pcbddc->local_primal_size;i++) { 687 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 688 } 689 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 690 for (i=0;i<pcbddc->local_primal_size;i++) { 691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 692 } 693 if (pcbddc->coarse_psi_B) { 694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 695 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 697 } 698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 699 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 701 } 702 } 703 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 704 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 705 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 706 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 707 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 708 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 709 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 710 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 711 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 712 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 713 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 714 if (pcbddc->coarse_psi_B) { 715 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 716 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 717 } 718 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 719 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 720 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 721 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 722 } 723 /* free memory */ 724 if (n_vertices) { 725 ierr = PetscFree(vertices);CHKERRQ(ierr); 726 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 727 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 728 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 729 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 730 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 731 } 732 if (n_constraints) { 733 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 734 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 735 ierr = MatDestroy(&M1);CHKERRQ(ierr); 736 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 737 } 738 ierr = PetscFree(auxindices);CHKERRQ(ierr); 739 /* get back data */ 740 *coarse_submat_vals_n = coarse_submat_vals; 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 746 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 747 { 748 PC_IS* pcis = (PC_IS*)(pc->data); 749 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 750 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 751 PetscBool issbaij,isbaij; 752 /* manage repeated solves */ 753 MatReuse reuse; 754 MatStructure matstruct; 755 PetscErrorCode ierr; 756 757 PetscFunctionBegin; 758 if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) { 759 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 760 } 761 /* get mat flags */ 762 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 763 reuse = MAT_INITIAL_MATRIX; 764 if (pc->setupcalled) { 765 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 766 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 767 if (matstruct == SAME_NONZERO_PATTERN) { 768 reuse = MAT_REUSE_MATRIX; 769 } else { 770 reuse = MAT_INITIAL_MATRIX; 771 } 772 } 773 if (reuse == MAT_INITIAL_MATRIX) { 774 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 775 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 776 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 777 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 778 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 779 } 780 781 /* transform local matrices if needed */ 782 if (pcbddc->use_change_of_basis) { 783 Mat change_mat_all; 784 PetscScalar *row_cmat_values; 785 PetscInt *row_cmat_indices; 786 PetscInt *nnz,*is_indices,*temp_indices; 787 PetscInt i,j,k,n_D,n_B; 788 789 /* Get Non-overlapping dimensions */ 790 n_B = pcis->n_B; 791 n_D = pcis->n-n_B; 792 793 /* compute nonzero structure of change of basis on all local nodes */ 794 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 795 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 796 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 797 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 798 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 799 k=1; 800 for (i=0;i<n_B;i++) { 801 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 802 nnz[is_indices[i]]=j; 803 if (k < j) k = j; 804 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 805 } 806 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 807 /* assemble change of basis matrix on the whole set of local dofs */ 808 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 809 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 810 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 811 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 812 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 813 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 814 for (i=0;i<n_D;i++) { 815 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 816 } 817 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 818 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 819 for (i=0;i<n_B;i++) { 820 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 821 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 822 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 823 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 824 } 825 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 826 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 827 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 828 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 829 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 830 if (!issbaij && !isbaij) { 831 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 832 } else { 833 Mat work_mat; 834 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 835 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 836 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 837 } 838 /* 839 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 840 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 841 */ 842 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 843 ierr = PetscFree(nnz);CHKERRQ(ierr); 844 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 845 } else { 846 /* without change of basis, the local matrix is unchanged */ 847 if (!pcbddc->local_mat) { 848 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 849 pcbddc->local_mat = matis->A; 850 } 851 } 852 853 /* get submatrices */ 854 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 855 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 856 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 857 if (!issbaij) { 858 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 859 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 860 } else { 861 Mat newmat; 862 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 863 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 864 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 865 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 866 } 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 872 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 873 { 874 PC_IS* pcis = (PC_IS*)(pc->data); 875 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 876 IS is_aux1,is_aux2; 877 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 878 PetscInt n_vertices,i,j,n_R,n_D,n_B; 879 PetscInt vbs,bs; 880 PetscBT bitmask; 881 PetscErrorCode ierr; 882 883 PetscFunctionBegin; 884 /* No need to setup local scatters if primal space is unchanged */ 885 if (!pcbddc->new_primal_space) { 886 PetscFunctionReturn(0); 887 } 888 /* destroy old objects */ 889 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 890 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 891 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 892 /* Set Non-overlapping dimensions */ 893 n_B = pcis->n_B; n_D = pcis->n - n_B; 894 /* get vertex indices from constraint matrix */ 895 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 896 897 /* create auxiliary bitmask */ 898 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 899 for (i=0;i<n_vertices;i++) { 900 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 901 } 902 903 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 904 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 905 for (i=0, n_R=0; i<pcis->n; i++) { 906 if (!PetscBTLookup(bitmask,i)) { 907 idx_R_local[n_R] = i; 908 n_R++; 909 } 910 } 911 912 /* Block code */ 913 vbs = 1; 914 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 915 if (bs>1 && !(n_vertices%bs)) { 916 PetscBool is_blocked = PETSC_TRUE; 917 PetscInt *vary; 918 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 919 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 920 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 921 for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++; 922 for (i=0; i<n_vertices; i++) { 923 if (vary[i]!=0 && vary[i]!=bs) { 924 is_blocked = PETSC_FALSE; 925 break; 926 } 927 } 928 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 929 vbs = bs; 930 for (i=0;i<n_R/vbs;i++) { 931 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 932 } 933 } 934 ierr = PetscFree(vary);CHKERRQ(ierr); 935 } 936 ierr = PetscFree(vertices);CHKERRQ(ierr); 937 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 938 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 939 940 /* print some info if requested */ 941 if (pcbddc->dbg_flag) { 942 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 943 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 944 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 945 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 946 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 947 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 948 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 949 } 950 951 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 952 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 953 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 954 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 955 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 956 for (i=0; i<n_D; i++) { 957 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 958 } 959 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 960 for (i=0, j=0; i<n_R; i++) { 961 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 962 aux_array1[j++] = i; 963 } 964 } 965 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 966 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 967 for (i=0, j=0; i<n_B; i++) { 968 if (!PetscBTLookup(bitmask,is_indices[i])) { 969 aux_array2[j++] = i; 970 } 971 } 972 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 973 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 974 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 975 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 976 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 977 978 if (pcbddc->switch_static || pcbddc->dbg_flag) { 979 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 980 for (i=0, j=0; i<n_R; i++) { 981 if (PetscBTLookup(bitmask,idx_R_local[i])) { 982 aux_array1[j++] = i; 983 } 984 } 985 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 986 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 987 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 988 } 989 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 990 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } 993 994 995 #undef __FUNCT__ 996 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 997 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 998 { 999 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1000 PC_IS *pcis = (PC_IS*)pc->data; 1001 PC pc_temp; 1002 Mat A_RR; 1003 Vec vec1,vec2,vec3; 1004 MatStructure matstruct; 1005 MatReuse reuse; 1006 PetscScalar m_one = -1.0; 1007 PetscReal value; 1008 PetscInt n_D,n_R,ibs,mbs; 1009 PetscBool use_exact,use_exact_reduced,issbaij; 1010 PetscErrorCode ierr; 1011 /* prefixes stuff */ 1012 char dir_prefix[256],neu_prefix[256],str_level[3]; 1013 size_t len; 1014 1015 PetscFunctionBegin; 1016 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 1017 1018 /* compute prefixes */ 1019 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1020 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1021 if (!pcbddc->current_level) { 1022 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1023 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1024 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1025 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1026 } else { 1027 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1028 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 1029 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1030 len -= 15; /* remove "pc_bddc_coarse_" */ 1031 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 1032 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1033 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1034 *(dir_prefix+len)='\0'; 1035 *(neu_prefix+len)='\0'; 1036 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1037 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1038 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1039 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1040 } 1041 1042 /* DIRICHLET PROBLEM */ 1043 /* Matrix for Dirichlet problem is pcis->A_II */ 1044 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1045 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1046 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1047 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1048 /* default */ 1049 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1050 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1051 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1052 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1053 if (issbaij) { 1054 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1055 } else { 1056 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1057 } 1058 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1059 } 1060 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 1061 /* Allow user's customization */ 1062 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1063 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1064 if (!n_D) { 1065 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1066 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1067 } 1068 /* Set Up KSP for Dirichlet problem of BDDC */ 1069 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1070 /* set ksp_D into pcis data */ 1071 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1072 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1073 pcis->ksp_D = pcbddc->ksp_D; 1074 1075 /* NEUMANN PROBLEM */ 1076 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1077 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1078 if (pcbddc->ksp_R) { /* already created ksp */ 1079 PetscInt nn_R; 1080 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr); 1081 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1082 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1083 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1084 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1085 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1086 reuse = MAT_INITIAL_MATRIX; 1087 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1088 if (pcbddc->new_primal_space) { /* we are not sure the matrix will have the same nonzero pattern */ 1089 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1090 reuse = MAT_INITIAL_MATRIX; 1091 } else { /* safe to reuse the matrix */ 1092 reuse = MAT_REUSE_MATRIX; 1093 } 1094 } 1095 /* last check */ 1096 if (matstruct == DIFFERENT_NONZERO_PATTERN) { 1097 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1098 reuse = MAT_INITIAL_MATRIX; 1099 } 1100 } else { /* first time, so we need to create the matrix */ 1101 reuse = MAT_INITIAL_MATRIX; 1102 } 1103 /* extract A_RR */ 1104 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1105 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1106 if (ibs != mbs) { 1107 Mat newmat; 1108 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1109 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1110 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1111 } else { 1112 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1113 } 1114 if (!pcbddc->ksp_R) { /* create object if not present */ 1115 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1116 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1117 /* default */ 1118 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1119 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1120 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1121 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1122 if (issbaij) { 1123 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1124 } else { 1125 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1126 } 1127 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1128 } 1129 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1130 /* Allow user's customization */ 1131 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1132 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1133 if (!n_R) { 1134 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1135 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1136 } 1137 /* Set Up KSP for Neumann problem of BDDC */ 1138 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1139 1140 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1141 1142 /* Dirichlet */ 1143 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 1144 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1145 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1146 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 1147 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 1148 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1149 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1150 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1151 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1152 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1153 /* need to be adapted? */ 1154 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1155 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1156 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1157 /* print info */ 1158 if (pcbddc->dbg_flag) { 1159 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1160 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1161 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1162 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1163 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1164 } 1165 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1166 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1167 } 1168 1169 /* Neumann */ 1170 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 1171 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1172 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1173 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 1174 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 1175 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1176 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1177 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1178 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1179 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1180 /* need to be adapted? */ 1181 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1182 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1183 /* print info */ 1184 if (pcbddc->dbg_flag) { 1185 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1186 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1187 } 1188 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1189 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1190 } 1191 /* free Neumann problem's matrix */ 1192 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1198 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1199 { 1200 PetscErrorCode ierr; 1201 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1202 1203 PetscFunctionBegin; 1204 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1205 if (pcbddc->local_auxmat1) { 1206 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1207 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1208 } 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1214 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1215 { 1216 PetscErrorCode ierr; 1217 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1218 PC_IS* pcis = (PC_IS*) (pc->data); 1219 const PetscScalar zero = 0.0; 1220 1221 PetscFunctionBegin; 1222 /* Application of PHI^T (or PSI^T) */ 1223 if (pcbddc->coarse_psi_B) { 1224 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1225 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1226 } else { 1227 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1228 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1229 } 1230 /* Scatter data of coarse_rhs */ 1231 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1232 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1233 1234 /* Local solution on R nodes */ 1235 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1236 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1237 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1238 if (pcbddc->switch_static) { 1239 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1240 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1241 } 1242 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1243 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1244 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 if (pcbddc->switch_static) { 1247 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1248 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1249 } 1250 1251 /* Coarse solution */ 1252 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1253 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1254 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1255 } 1256 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1257 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1258 1259 /* Sum contributions from two levels */ 1260 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1261 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1267 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1268 { 1269 PetscErrorCode ierr; 1270 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1271 1272 PetscFunctionBegin; 1273 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1279 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1280 { 1281 PetscErrorCode ierr; 1282 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1283 1284 PetscFunctionBegin; 1285 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 /* uncomment for testing purposes */ 1290 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1293 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1294 { 1295 PetscErrorCode ierr; 1296 PC_IS* pcis = (PC_IS*)(pc->data); 1297 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1298 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1299 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1300 MatType impMatType=MATSEQAIJ; 1301 /* one and zero */ 1302 PetscScalar one=1.0,zero=0.0; 1303 /* space to store constraints and their local indices */ 1304 PetscScalar *temp_quadrature_constraint; 1305 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1306 /* iterators */ 1307 PetscInt i,j,k,total_counts,temp_start_ptr; 1308 /* stuff to store connected components stored in pcbddc->mat_graph */ 1309 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1310 PetscInt n_ISForFaces,n_ISForEdges; 1311 /* near null space stuff */ 1312 MatNullSpace nearnullsp; 1313 const Vec *nearnullvecs; 1314 Vec *localnearnullsp; 1315 PetscBool nnsp_has_cnst; 1316 PetscInt nnsp_size; 1317 PetscScalar *array; 1318 /* BLAS integers */ 1319 PetscBLASInt lwork,lierr; 1320 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1321 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1322 /* LAPACK working arrays for SVD or POD */ 1323 PetscBool skip_lapack; 1324 PetscScalar *work; 1325 PetscReal *singular_vals; 1326 #if defined(PETSC_USE_COMPLEX) 1327 PetscReal *rwork; 1328 #endif 1329 #if defined(PETSC_MISSING_LAPACK_GESVD) 1330 PetscBLASInt Blas_one_2=1; 1331 PetscScalar *temp_basis,*correlation_mat; 1332 #else 1333 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1334 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1335 #endif 1336 /* change of basis */ 1337 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1338 PetscBool boolforchange,qr_needed; 1339 PetscBT touched,change_basis,qr_needed_idx; 1340 /* auxiliary stuff */ 1341 PetscInt *nnz,*is_indices,*local_to_B; 1342 /* some quantities */ 1343 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1344 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1345 1346 1347 PetscFunctionBegin; 1348 /* Destroy Mat objects computed previously */ 1349 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1350 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1351 /* Get index sets for faces, edges and vertices from graph */ 1352 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1353 pcbddc->use_vertices = PETSC_TRUE; 1354 } 1355 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1356 /* HACK: provide functions to set change of basis */ 1357 if (!ISForVertices && pcbddc->NullSpace) { 1358 pcbddc->use_change_of_basis = PETSC_TRUE; 1359 pcbddc->use_change_on_faces = PETSC_FALSE; 1360 } 1361 /* print some info */ 1362 if (pcbddc->dbg_flag) { 1363 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1364 i = 0; 1365 if (ISForVertices) { 1366 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1367 } 1368 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1369 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1370 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1371 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1372 } 1373 /* check if near null space is attached to global mat */ 1374 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1375 if (nearnullsp) { 1376 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1377 /* remove any stored info */ 1378 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1379 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1380 /* store information for BDDC solver reuse */ 1381 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1382 pcbddc->onearnullspace = nearnullsp; 1383 ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1384 for (i=0;i<nnsp_size;i++) { 1385 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1386 } 1387 } else { /* if near null space is not provided BDDC uses constants by default */ 1388 nnsp_size = 0; 1389 nnsp_has_cnst = PETSC_TRUE; 1390 } 1391 /* get max number of constraints on a single cc */ 1392 max_constraints = nnsp_size; 1393 if (nnsp_has_cnst) max_constraints++; 1394 1395 /* 1396 Evaluate maximum storage size needed by the procedure 1397 - temp_indices will contain start index of each constraint stored as follows 1398 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1399 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1400 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1401 */ 1402 total_counts = n_ISForFaces+n_ISForEdges; 1403 total_counts *= max_constraints; 1404 n_vertices = 0; 1405 if (ISForVertices) { 1406 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1407 } 1408 total_counts += n_vertices; 1409 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1410 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1411 total_counts = 0; 1412 max_size_of_constraint = 0; 1413 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1414 if (i<n_ISForEdges) { 1415 used_IS = &ISForEdges[i]; 1416 } else { 1417 used_IS = &ISForFaces[i-n_ISForEdges]; 1418 } 1419 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1420 total_counts += j; 1421 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1422 } 1423 total_counts *= max_constraints; 1424 total_counts += n_vertices; 1425 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1426 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1427 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1428 /* local to boundary numbering */ 1429 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1430 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1431 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1432 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1433 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1434 /* get local part of global near null space vectors */ 1435 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1436 for (k=0;k<nnsp_size;k++) { 1437 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1438 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1439 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1440 } 1441 1442 /* whether or not to skip lapack calls */ 1443 skip_lapack = PETSC_TRUE; 1444 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1445 1446 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1447 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1448 PetscScalar temp_work; 1449 #if defined(PETSC_MISSING_LAPACK_GESVD) 1450 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1451 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1452 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1453 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1454 #if defined(PETSC_USE_COMPLEX) 1455 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1456 #endif 1457 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1458 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1459 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1460 lwork = -1; 1461 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1462 #if !defined(PETSC_USE_COMPLEX) 1463 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1464 #else 1465 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1466 #endif 1467 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1468 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1469 #else /* on missing GESVD */ 1470 /* SVD */ 1471 PetscInt max_n,min_n; 1472 max_n = max_size_of_constraint; 1473 min_n = max_constraints; 1474 if (max_size_of_constraint < max_constraints) { 1475 min_n = max_size_of_constraint; 1476 max_n = max_constraints; 1477 } 1478 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1479 #if defined(PETSC_USE_COMPLEX) 1480 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1481 #endif 1482 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1483 lwork = -1; 1484 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1485 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1486 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1487 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1488 #if !defined(PETSC_USE_COMPLEX) 1489 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1490 #else 1491 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1492 #endif 1493 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1494 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1495 #endif /* on missing GESVD */ 1496 /* Allocate optimal workspace */ 1497 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1498 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1499 } 1500 /* Now we can loop on constraining sets */ 1501 total_counts = 0; 1502 temp_indices[0] = 0; 1503 /* vertices */ 1504 if (ISForVertices) { 1505 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1506 if (nnsp_has_cnst) { /* consider all vertices */ 1507 for (i=0;i<n_vertices;i++) { 1508 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1509 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1510 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1511 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1512 total_counts++; 1513 } 1514 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1515 PetscBool used_vertex; 1516 for (i=0;i<n_vertices;i++) { 1517 used_vertex = PETSC_FALSE; 1518 k = 0; 1519 while (!used_vertex && k<nnsp_size) { 1520 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1521 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1522 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1523 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1524 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1525 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1526 total_counts++; 1527 used_vertex = PETSC_TRUE; 1528 } 1529 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1530 k++; 1531 } 1532 } 1533 } 1534 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1535 n_vertices = total_counts; 1536 } 1537 1538 /* edges and faces */ 1539 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1540 if (i<n_ISForEdges) { 1541 used_IS = &ISForEdges[i]; 1542 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1543 } else { 1544 used_IS = &ISForFaces[i-n_ISForEdges]; 1545 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1546 } 1547 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1548 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1549 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1550 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1551 /* change of basis should not be performed on local periodic nodes */ 1552 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1553 if (nnsp_has_cnst) { 1554 PetscScalar quad_value; 1555 temp_constraints++; 1556 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1557 for (j=0;j<size_of_constraint;j++) { 1558 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1559 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1560 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1561 } 1562 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1563 total_counts++; 1564 } 1565 for (k=0;k<nnsp_size;k++) { 1566 PetscReal real_value; 1567 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1568 for (j=0;j<size_of_constraint;j++) { 1569 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1570 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1571 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1572 } 1573 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1574 /* check if array is null on the connected component */ 1575 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1576 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1577 if (real_value > 0.0) { /* keep indices and values */ 1578 temp_constraints++; 1579 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1580 total_counts++; 1581 } 1582 } 1583 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1584 valid_constraints = temp_constraints; 1585 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1586 if (!pcbddc->use_nnsp_true && temp_constraints) { 1587 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1588 1589 #if defined(PETSC_MISSING_LAPACK_GESVD) 1590 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1591 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1592 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1593 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1594 from that computed using LAPACKgesvd 1595 -> This is due to a different computation of eigenvectors in LAPACKheev 1596 -> The quality of the POD-computed basis will be the same */ 1597 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1598 /* Store upper triangular part of correlation matrix */ 1599 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1600 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1601 for (j=0;j<temp_constraints;j++) { 1602 for (k=0;k<j+1;k++) { 1603 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1604 } 1605 } 1606 /* compute eigenvalues and eigenvectors of correlation matrix */ 1607 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1608 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1609 #if !defined(PETSC_USE_COMPLEX) 1610 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1611 #else 1612 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1613 #endif 1614 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1615 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1616 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1617 j=0; 1618 while (j < temp_constraints && singular_vals[j] < tol) j++; 1619 total_counts=total_counts-j; 1620 valid_constraints = temp_constraints-j; 1621 /* scale and copy POD basis into used quadrature memory */ 1622 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1623 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1624 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1625 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1626 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1627 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1628 if (j<temp_constraints) { 1629 PetscInt ii; 1630 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1631 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1632 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1633 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1634 for (k=0;k<temp_constraints-j;k++) { 1635 for (ii=0;ii<size_of_constraint;ii++) { 1636 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1637 } 1638 } 1639 } 1640 #else /* on missing GESVD */ 1641 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1642 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1643 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1644 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1645 #if !defined(PETSC_USE_COMPLEX) 1646 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1647 #else 1648 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1649 #endif 1650 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1651 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1652 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1653 k = temp_constraints; 1654 if (k > size_of_constraint) k = size_of_constraint; 1655 j = 0; 1656 while (j < k && singular_vals[k-j-1] < tol) j++; 1657 total_counts = total_counts-temp_constraints+k-j; 1658 valid_constraints = k-j; 1659 #endif /* on missing GESVD */ 1660 } 1661 /* setting change_of_basis flag is safe now */ 1662 if (boolforchange) { 1663 for (j=0;j<valid_constraints;j++) { 1664 PetscBTSet(change_basis,total_counts-j-1); 1665 } 1666 } 1667 } 1668 /* free index sets of faces, edges and vertices */ 1669 for (i=0;i<n_ISForFaces;i++) { 1670 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1671 } 1672 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1673 for (i=0;i<n_ISForEdges;i++) { 1674 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1675 } 1676 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1677 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1678 1679 /* free workspace */ 1680 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1681 ierr = PetscFree(work);CHKERRQ(ierr); 1682 #if defined(PETSC_USE_COMPLEX) 1683 ierr = PetscFree(rwork);CHKERRQ(ierr); 1684 #endif 1685 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1686 #if defined(PETSC_MISSING_LAPACK_GESVD) 1687 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1688 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1689 #endif 1690 } 1691 for (k=0;k<nnsp_size;k++) { 1692 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1693 } 1694 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1695 1696 /* set quantities in pcbddc data structure */ 1697 /* n_vertices defines the number of subdomain corners in the primal space */ 1698 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1699 pcbddc->local_primal_size = total_counts; 1700 pcbddc->n_vertices = n_vertices; 1701 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1702 1703 /* Create constraint matrix */ 1704 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1705 /* so we need to set it up properly either with or without change of basis */ 1706 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1707 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1708 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1709 /* array to compute a local numbering of constraints : vertices first then constraints */ 1710 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1711 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1712 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1713 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1714 /* auxiliary stuff for basis change */ 1715 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1716 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1717 1718 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1719 total_primal_vertices=0; 1720 for (i=0;i<pcbddc->local_primal_size;i++) { 1721 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1722 if (size_of_constraint == 1) { 1723 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1724 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1725 aux_primal_minloc[total_primal_vertices]=0; 1726 total_primal_vertices++; 1727 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1728 PetscInt min_loc,min_index; 1729 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1730 /* find first untouched local node */ 1731 k = 0; 1732 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1733 min_index = global_indices[k]; 1734 min_loc = k; 1735 /* search the minimum among global nodes already untouched on the cc */ 1736 for (k=1;k<size_of_constraint;k++) { 1737 /* there can be more than one constraint on a single connected component */ 1738 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1739 min_index = global_indices[k]; 1740 min_loc = k; 1741 } 1742 } 1743 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1744 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1745 aux_primal_minloc[total_primal_vertices]=min_loc; 1746 total_primal_vertices++; 1747 } 1748 } 1749 /* determine if a QR strategy is needed for change of basis */ 1750 qr_needed = PETSC_FALSE; 1751 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1752 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1753 if (PetscBTLookup(change_basis,i)) { 1754 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1755 j = 0; 1756 for (k=0;k<size_of_constraint;k++) { 1757 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1758 j++; 1759 } 1760 } 1761 /* found more than one primal dof on the cc */ 1762 if (j > 1) { 1763 PetscBTSet(qr_needed_idx,i); 1764 qr_needed = PETSC_TRUE; 1765 } 1766 } 1767 } 1768 /* free workspace */ 1769 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1770 1771 /* permute indices in order to have a sorted set of vertices */ 1772 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1773 1774 /* nonzero structure of constraint matrix */ 1775 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1776 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1777 j=total_primal_vertices; 1778 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1779 if (!PetscBTLookup(change_basis,i)) { 1780 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1781 j++; 1782 } 1783 } 1784 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1785 ierr = PetscFree(nnz);CHKERRQ(ierr); 1786 /* set values in constraint matrix */ 1787 for (i=0;i<total_primal_vertices;i++) { 1788 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1789 } 1790 total_counts = total_primal_vertices; 1791 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1792 if (!PetscBTLookup(change_basis,i)) { 1793 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1794 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1795 total_counts++; 1796 } 1797 } 1798 /* assembling */ 1799 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1800 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1801 /* 1802 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1803 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1804 */ 1805 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1806 if (pcbddc->use_change_of_basis) { 1807 /* dual and primal dofs on a single cc */ 1808 PetscInt dual_dofs,primal_dofs; 1809 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1810 PetscInt primal_counter; 1811 /* working stuff for GEQRF */ 1812 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1813 PetscBLASInt lqr_work; 1814 /* working stuff for UNGQR */ 1815 PetscScalar *gqr_work,lgqr_work_t; 1816 PetscBLASInt lgqr_work; 1817 /* working stuff for TRTRS */ 1818 PetscScalar *trs_rhs; 1819 PetscBLASInt Blas_NRHS; 1820 /* pointers for values insertion into change of basis matrix */ 1821 PetscInt *start_rows,*start_cols; 1822 PetscScalar *start_vals; 1823 /* working stuff for values insertion */ 1824 PetscBT is_primal; 1825 1826 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1827 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1828 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1829 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1830 /* work arrays */ 1831 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1832 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1833 /* nonzeros per row */ 1834 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1835 if (PetscBTLookup(change_basis,i)) { 1836 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1837 if (PetscBTLookup(qr_needed_idx,i)) { 1838 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1839 } else { 1840 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1841 /* get local primal index on the cc */ 1842 j = 0; 1843 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1844 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1845 } 1846 } 1847 } 1848 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1849 ierr = PetscFree(nnz);CHKERRQ(ierr); 1850 /* Set initial identity in the matrix */ 1851 for (i=0;i<pcis->n_B;i++) { 1852 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1853 } 1854 1855 if (pcbddc->dbg_flag) { 1856 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1857 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1858 } 1859 1860 1861 /* Now we loop on the constraints which need a change of basis */ 1862 /* 1863 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1864 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1865 1866 Basic blocks of change of basis matrix T computed 1867 1868 - Using the following block transformation if there is only a primal dof on the cc 1869 (in the example, primal dof is the last one of the edge in LOCAL ordering 1870 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1871 | 1 0 ... 0 1 | 1872 | 0 1 ... 0 1 | 1873 | ... | 1874 | 0 ... 1 1 | 1875 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1876 1877 - via QR decomposition of constraints otherwise 1878 */ 1879 if (qr_needed) { 1880 /* space to store Q */ 1881 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1882 /* first we issue queries for optimal work */ 1883 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1884 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1885 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1886 lqr_work = -1; 1887 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1888 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1889 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1890 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1891 lgqr_work = -1; 1892 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1893 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1894 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1895 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1896 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1897 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1898 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1899 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1900 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1901 /* array to store scaling factors for reflectors */ 1902 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1903 /* array to store rhs and solution of triangular solver */ 1904 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1905 /* allocating workspace for check */ 1906 if (pcbddc->dbg_flag) { 1907 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1908 } 1909 } 1910 /* array to store whether a node is primal or not */ 1911 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1912 for (i=0;i<total_primal_vertices;i++) { 1913 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1914 } 1915 1916 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1917 /* -> using implicit ordering contained in temp_indices data */ 1918 total_counts = pcbddc->n_vertices; 1919 primal_counter = total_counts; 1920 while (total_counts<pcbddc->local_primal_size) { 1921 primal_dofs = 1; 1922 if (PetscBTLookup(change_basis,total_counts)) { 1923 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1924 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1925 primal_dofs++; 1926 } 1927 /* get constraint info */ 1928 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1929 dual_dofs = size_of_constraint-primal_dofs; 1930 1931 if (pcbddc->dbg_flag) { 1932 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1933 } 1934 1935 if (primal_dofs > 1) { /* QR */ 1936 1937 /* copy quadrature constraints for change of basis check */ 1938 if (pcbddc->dbg_flag) { 1939 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1940 } 1941 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1942 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1943 1944 /* compute QR decomposition of constraints */ 1945 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1946 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1947 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1948 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1949 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1950 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1951 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1952 1953 /* explictly compute R^-T */ 1954 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1955 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1956 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1957 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1958 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1959 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1960 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1961 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1962 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1963 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1964 1965 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1966 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1967 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1968 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1969 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1970 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1971 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1972 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1973 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1974 1975 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1976 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1977 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1978 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1979 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1980 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1981 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1982 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1983 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1984 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1985 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1986 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1987 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1988 1989 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1990 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1991 /* insert cols for primal dofs */ 1992 for (j=0;j<primal_dofs;j++) { 1993 start_vals = &qr_basis[j*size_of_constraint]; 1994 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1995 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1996 } 1997 /* insert cols for dual dofs */ 1998 for (j=0,k=0;j<dual_dofs;k++) { 1999 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2000 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2001 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2002 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2003 j++; 2004 } 2005 } 2006 2007 /* check change of basis */ 2008 if (pcbddc->dbg_flag) { 2009 PetscInt ii,jj; 2010 PetscBool valid_qr=PETSC_TRUE; 2011 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2012 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2013 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2014 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2015 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2016 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2017 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2018 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2019 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2020 for (jj=0;jj<size_of_constraint;jj++) { 2021 for (ii=0;ii<primal_dofs;ii++) { 2022 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2023 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2024 } 2025 } 2026 if (!valid_qr) { 2027 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2028 for (jj=0;jj<size_of_constraint;jj++) { 2029 for (ii=0;ii<primal_dofs;ii++) { 2030 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2031 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2032 } 2033 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2034 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2035 } 2036 } 2037 } 2038 } else { 2039 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2040 } 2041 } 2042 } else { /* simple transformation block */ 2043 PetscInt row,col; 2044 PetscScalar val; 2045 for (j=0;j<size_of_constraint;j++) { 2046 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2047 if (!PetscBTLookup(is_primal,row)) { 2048 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2049 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2050 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2051 } else { 2052 for (k=0;k<size_of_constraint;k++) { 2053 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2054 if (row != col) { 2055 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2056 } else { 2057 val = 1.0; 2058 } 2059 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2060 } 2061 } 2062 } 2063 } 2064 /* increment primal counter */ 2065 primal_counter += primal_dofs; 2066 } else { 2067 if (pcbddc->dbg_flag) { 2068 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2069 } 2070 } 2071 /* increment constraint counter total_counts */ 2072 total_counts += primal_dofs; 2073 } 2074 2075 /* free workspace */ 2076 if (qr_needed) { 2077 if (pcbddc->dbg_flag) { 2078 ierr = PetscFree(work);CHKERRQ(ierr); 2079 } 2080 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2081 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2082 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2083 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2084 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2085 } 2086 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2087 /* assembling */ 2088 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2089 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2090 /* 2091 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2092 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2093 */ 2094 } 2095 2096 /* flush dbg viewer */ 2097 if (pcbddc->dbg_flag) { 2098 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2099 } 2100 2101 /* free workspace */ 2102 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2103 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2104 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2105 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2106 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2107 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2108 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2109 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2110 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2111 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2112 PetscFunctionReturn(0); 2113 } 2114 2115 #undef __FUNCT__ 2116 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2117 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2118 { 2119 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2120 PC_IS *pcis = (PC_IS*)pc->data; 2121 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2122 PetscInt bs,ierr,i,vertex_size; 2123 PetscViewer viewer=pcbddc->dbg_viewer; 2124 2125 PetscFunctionBegin; 2126 /* Reset previously computed graph */ 2127 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2128 /* Init local Graph struct */ 2129 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2130 2131 /* Check validity of the csr graph passed in by the user */ 2132 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2133 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2134 } 2135 2136 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2137 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2138 Mat mat_adj; 2139 const PetscInt *xadj,*adjncy; 2140 PetscBool flg_row=PETSC_TRUE; 2141 2142 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2143 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2144 if (!flg_row) { 2145 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2146 } 2147 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2148 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2149 if (!flg_row) { 2150 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2151 } 2152 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2153 } 2154 2155 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2156 vertex_size = 1; 2157 if (!pcbddc->user_provided_isfordofs) { 2158 if (!pcbddc->n_ISForDofs) { 2159 IS *custom_ISForDofs; 2160 2161 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2162 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2163 for (i=0;i<bs;i++) { 2164 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2165 } 2166 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2167 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2168 /* remove my references to IS objects */ 2169 for (i=0;i<bs;i++) { 2170 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2171 } 2172 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2173 } 2174 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2175 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2176 } 2177 2178 /* Setup of Graph */ 2179 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2180 2181 /* Graph's connected components analysis */ 2182 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2183 2184 /* print some info to stdout */ 2185 if (pcbddc->dbg_flag) { 2186 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2187 } 2188 2189 /* mark topography has done */ 2190 pcbddc->recompute_topography = PETSC_FALSE; 2191 PetscFunctionReturn(0); 2192 } 2193 2194 #undef __FUNCT__ 2195 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2196 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2197 { 2198 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2199 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2200 PetscErrorCode ierr; 2201 2202 PetscFunctionBegin; 2203 n = 0; 2204 vertices = 0; 2205 if (pcbddc->ConstraintMatrix) { 2206 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2207 for (i=0;i<local_primal_size;i++) { 2208 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2209 if (size_of_constraint == 1) n++; 2210 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2211 } 2212 if (vertices_idx) { 2213 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2214 n = 0; 2215 for (i=0;i<local_primal_size;i++) { 2216 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2217 if (size_of_constraint == 1) { 2218 vertices[n++]=row_cmat_indices[0]; 2219 } 2220 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2221 } 2222 } 2223 } 2224 *n_vertices = n; 2225 if (vertices_idx) *vertices_idx = vertices; 2226 PetscFunctionReturn(0); 2227 } 2228 2229 #undef __FUNCT__ 2230 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2231 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2232 { 2233 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2234 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2235 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2236 PetscBT touched; 2237 PetscErrorCode ierr; 2238 2239 /* This function assumes that the number of local constraints per connected component 2240 is not greater than the number of nodes defined for the connected component 2241 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2242 PetscFunctionBegin; 2243 n = 0; 2244 constraints_index = 0; 2245 if (pcbddc->ConstraintMatrix) { 2246 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2247 max_size_of_constraint = 0; 2248 for (i=0;i<local_primal_size;i++) { 2249 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2250 if (size_of_constraint > 1) { 2251 n++; 2252 } 2253 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2254 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2255 } 2256 if (constraints_idx) { 2257 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2258 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2259 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2260 n = 0; 2261 for (i=0;i<local_primal_size;i++) { 2262 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2263 if (size_of_constraint > 1) { 2264 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2265 /* find first untouched local node */ 2266 j = 0; 2267 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2268 min_index = row_cmat_global_indices[j]; 2269 min_loc = j; 2270 /* search the minimum among nodes not yet touched on the connected component 2271 since there can be more than one constraint on a single cc */ 2272 for (j=1;j<size_of_constraint;j++) { 2273 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2274 min_index = row_cmat_global_indices[j]; 2275 min_loc = j; 2276 } 2277 } 2278 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2279 constraints_index[n++] = row_cmat_indices[min_loc]; 2280 } 2281 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2282 } 2283 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2284 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2285 } 2286 } 2287 *n_constraints = n; 2288 if (constraints_idx) *constraints_idx = constraints_index; 2289 PetscFunctionReturn(0); 2290 } 2291 2292 /* the next two functions has been adapted from pcis.c */ 2293 #undef __FUNCT__ 2294 #define __FUNCT__ "PCBDDCApplySchur" 2295 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2296 { 2297 PetscErrorCode ierr; 2298 PC_IS *pcis = (PC_IS*)(pc->data); 2299 2300 PetscFunctionBegin; 2301 if (!vec2_B) { vec2_B = v; } 2302 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2303 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2304 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2305 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2306 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2307 PetscFunctionReturn(0); 2308 } 2309 2310 #undef __FUNCT__ 2311 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2312 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2313 { 2314 PetscErrorCode ierr; 2315 PC_IS *pcis = (PC_IS*)(pc->data); 2316 2317 PetscFunctionBegin; 2318 if (!vec2_B) { vec2_B = v; } 2319 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2320 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2321 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2322 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2323 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2324 PetscFunctionReturn(0); 2325 } 2326 2327 #undef __FUNCT__ 2328 #define __FUNCT__ "PCBDDCSubsetNumbering" 2329 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2330 { 2331 Vec local_vec,global_vec; 2332 IS seqis,paris; 2333 VecScatter scatter_ctx; 2334 PetscScalar *array; 2335 PetscInt *temp_global_dofs; 2336 PetscScalar globalsum; 2337 PetscInt i,j,s; 2338 PetscInt nlocals,first_index,old_index,max_local; 2339 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2340 PetscMPIInt *dof_sizes,*dof_displs; 2341 PetscBool first_found; 2342 PetscErrorCode ierr; 2343 2344 PetscFunctionBegin; 2345 /* mpi buffers */ 2346 MPI_Comm_size(comm,&size_prec_comm); 2347 MPI_Comm_rank(comm,&rank_prec_comm); 2348 j = ( !rank_prec_comm ? size_prec_comm : 0); 2349 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2350 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2351 /* get maximum size of subset */ 2352 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2353 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2354 max_local = 0; 2355 if (n_local_dofs) { 2356 max_local = temp_global_dofs[0]; 2357 for (i=1;i<n_local_dofs;i++) { 2358 if (max_local < temp_global_dofs[i] ) { 2359 max_local = temp_global_dofs[i]; 2360 } 2361 } 2362 } 2363 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2364 max_global++; 2365 max_local = 0; 2366 if (n_local_dofs) { 2367 max_local = local_dofs[0]; 2368 for (i=1;i<n_local_dofs;i++) { 2369 if (max_local < local_dofs[i] ) { 2370 max_local = local_dofs[i]; 2371 } 2372 } 2373 } 2374 max_local++; 2375 /* allocate workspace */ 2376 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2377 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2378 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2379 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2380 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2381 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2382 /* create scatter */ 2383 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2384 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2385 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2386 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2387 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2388 /* init array */ 2389 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2390 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2391 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2392 if (local_dofs_mult) { 2393 for (i=0;i<n_local_dofs;i++) { 2394 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2395 } 2396 } else { 2397 for (i=0;i<n_local_dofs;i++) { 2398 array[local_dofs[i]]=1.0; 2399 } 2400 } 2401 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2402 /* scatter into global vec and get total number of global dofs */ 2403 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2404 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2405 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2406 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2407 /* Fill global_vec with cumulative function for global numbering */ 2408 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2409 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2410 nlocals = 0; 2411 first_index = -1; 2412 first_found = PETSC_FALSE; 2413 for (i=0;i<s;i++) { 2414 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2415 first_found = PETSC_TRUE; 2416 first_index = i; 2417 } 2418 nlocals += (PetscInt)PetscRealPart(array[i]); 2419 } 2420 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2421 if (!rank_prec_comm) { 2422 dof_displs[0]=0; 2423 for (i=1;i<size_prec_comm;i++) { 2424 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2425 } 2426 } 2427 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2428 if (first_found) { 2429 array[first_index] += (PetscScalar)nlocals; 2430 old_index = first_index; 2431 for (i=first_index+1;i<s;i++) { 2432 if (PetscRealPart(array[i]) > 0.0) { 2433 array[i] += array[old_index]; 2434 old_index = i; 2435 } 2436 } 2437 } 2438 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2439 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2440 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2441 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2442 /* get global ordering of local dofs */ 2443 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2444 if (local_dofs_mult) { 2445 for (i=0;i<n_local_dofs;i++) { 2446 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2447 } 2448 } else { 2449 for (i=0;i<n_local_dofs;i++) { 2450 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2451 } 2452 } 2453 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2454 /* free workspace */ 2455 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2456 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2457 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2458 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2459 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2460 /* return pointer to global ordering of local dofs */ 2461 *global_numbering_subset = temp_global_dofs; 2462 PetscFunctionReturn(0); 2463 } 2464 2465 #undef __FUNCT__ 2466 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2467 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2468 { 2469 PetscInt i,j; 2470 PetscScalar *alphas; 2471 PetscErrorCode ierr; 2472 2473 PetscFunctionBegin; 2474 /* this implements stabilized Gram-Schmidt */ 2475 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2476 for (i=0;i<n;i++) { 2477 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2478 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2479 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2480 } 2481 ierr = PetscFree(alphas);CHKERRQ(ierr); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2486 #undef __FUNCT__ 2487 #define __FUNCT__ "MatConvert_IS_AIJ" 2488 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2489 { 2490 Mat new_mat; 2491 Mat_IS *matis = (Mat_IS*)(mat->data); 2492 /* info on mat */ 2493 PetscInt bs,rows,cols; 2494 PetscInt lrows,lcols; 2495 PetscInt local_rows,local_cols; 2496 PetscMPIInt nsubdomains; 2497 /* preallocation */ 2498 Vec vec_dnz,vec_onz; 2499 PetscScalar *my_dnz,*my_onz,*array; 2500 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2501 PetscInt index_row,index_col,owner; 2502 PetscInt *local_indices,*global_indices; 2503 /* work */ 2504 PetscInt i,j; 2505 PetscErrorCode ierr; 2506 2507 PetscFunctionBegin; 2508 /* CHECKS */ 2509 /* get info from mat */ 2510 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2511 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2512 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2513 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2514 2515 /* MAT_INITIAL_MATRIX */ 2516 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2517 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2518 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2519 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2520 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2521 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2522 2523 /* 2524 preallocation 2525 */ 2526 2527 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2528 /* 2529 Some vectors are needed to sum up properly on shared interface dofs. 2530 Note that preallocation is not exact, since it overestimates nonzeros 2531 */ 2532 /* 2533 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2534 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2535 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2536 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2537 */ 2538 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2539 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2540 /* All processes need to compute entire row ownership */ 2541 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2542 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2543 for (i=0;i<nsubdomains;i++) { 2544 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2545 row_ownership[j]=i; 2546 } 2547 } 2548 /* map indices of local mat to global values */ 2549 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2550 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2551 for (i=0;i<local_rows;i++) local_indices[i]=i; 2552 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2553 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2554 2555 /* 2556 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2557 then, they will be summed up properly. This way, preallocation is always sufficient 2558 */ 2559 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2560 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2561 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2562 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2563 for (i=0;i<local_rows;i++) { 2564 index_row = global_indices[i]; 2565 for (j=i;j<local_rows;j++) { 2566 owner = row_ownership[index_row]; 2567 index_col = global_indices[j]; 2568 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2569 my_dnz[i] += 1.0; 2570 } else { /* offdiag block */ 2571 my_onz[i] += 1.0; 2572 } 2573 /* same as before, interchanging rows and cols */ 2574 if (i != j) { 2575 owner = row_ownership[index_col]; 2576 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2577 my_dnz[j] += 1.0; 2578 } else { 2579 my_onz[j] += 1.0; 2580 } 2581 } 2582 } 2583 } 2584 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2585 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2586 if (local_rows) { /* multilevel guard */ 2587 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2588 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2589 } 2590 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2591 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2592 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2593 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2594 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2595 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2596 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2597 2598 /* set computed preallocation in dnz and onz */ 2599 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2600 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2601 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2602 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2603 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2604 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2605 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2606 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2607 2608 /* Resize preallocation if overestimated */ 2609 for (i=0;i<lrows;i++) { 2610 dnz[i] = PetscMin(dnz[i],lcols); 2611 onz[i] = PetscMin(onz[i],cols-lcols); 2612 } 2613 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2614 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2615 2616 /* 2617 Set values. Very Basic. 2618 */ 2619 for (i=0;i<local_rows;i++) { 2620 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2621 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2622 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2623 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2624 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2625 } 2626 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2627 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2628 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2629 2630 /* get back mat */ 2631 *M = new_mat; 2632 PetscFunctionReturn(0); 2633 } 2634 2635 #undef __FUNCT__ 2636 #define __FUNCT__ "MatISSubassemble_Private" 2637 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2638 { 2639 Mat subdomain_adj; 2640 IS new_ranks,ranks_send_to; 2641 MatPartitioning partitioner; 2642 Mat_IS *matis; 2643 PetscInt n_neighs,*neighs,*n_shared,**shared; 2644 PetscInt prank; 2645 PetscMPIInt size,rank,color; 2646 PetscInt *xadj,*adjncy,*oldranks; 2647 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2648 PetscInt i,j,n_subdomains,local_size,threshold=0; 2649 PetscErrorCode ierr; 2650 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2651 PetscSubcomm subcomm; 2652 2653 PetscFunctionBegin; 2654 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2655 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2656 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2657 2658 /* Get info on mapping */ 2659 matis = (Mat_IS*)(mat->data); 2660 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2661 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2662 2663 /* build local CSR graph of subdomains' connectivity */ 2664 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2665 xadj[0] = 0; 2666 xadj[1] = PetscMax(n_neighs-1,0); 2667 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2668 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2669 2670 if (threshold) { 2671 PetscInt* count,min_threshold; 2672 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2673 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2674 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2675 for (j=0;j<n_shared[i];j++) { 2676 count[shared[i][j]] += 1; 2677 } 2678 } 2679 /* adapt threshold since we dont want to lose significant connections */ 2680 min_threshold = n_neighs; 2681 for (i=1;i<n_neighs;i++) { 2682 for (j=0;j<n_shared[i];j++) { 2683 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2684 } 2685 } 2686 threshold = PetscMax(min_threshold+1,threshold); 2687 xadj[1] = 0; 2688 for (i=1;i<n_neighs;i++) { 2689 for (j=0;j<n_shared[i];j++) { 2690 if (count[shared[i][j]] < threshold) { 2691 adjncy[xadj[1]] = neighs[i]; 2692 adjncy_wgt[xadj[1]] = n_shared[i]; 2693 xadj[1]++; 2694 break; 2695 } 2696 } 2697 } 2698 ierr = PetscFree(count);CHKERRQ(ierr); 2699 } else { 2700 if (xadj[1]) { 2701 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2702 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2703 } 2704 } 2705 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2706 if (use_square) { 2707 for (i=0;i<xadj[1];i++) { 2708 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2709 } 2710 } 2711 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2712 2713 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2714 2715 /* 2716 Restrict work on active processes only. 2717 */ 2718 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2719 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2720 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2721 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2722 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2723 if (color) { 2724 ierr = PetscFree(xadj);CHKERRQ(ierr); 2725 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2726 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2727 } else { 2728 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2729 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2730 prank = rank; 2731 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2732 for (i=0;i<size;i++) { 2733 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2734 } 2735 for (i=0;i<xadj[1];i++) { 2736 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2737 } 2738 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2739 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2740 n_subdomains = (PetscInt)size; 2741 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2742 2743 /* Partition */ 2744 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2745 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2746 if (use_vwgt) { 2747 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2748 v_wgt[0] = local_size; 2749 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2750 } 2751 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2752 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2753 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2754 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2755 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2756 2757 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2758 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2759 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2760 /* clean up */ 2761 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2762 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2763 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2764 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2765 } 2766 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2767 2768 /* assemble parallel IS for sends */ 2769 i = 1; 2770 if (color) i=0; 2771 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2772 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2773 2774 /* get back IS */ 2775 *is_sends = ranks_send_to; 2776 PetscFunctionReturn(0); 2777 } 2778 2779 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2780 2781 #undef __FUNCT__ 2782 #define __FUNCT__ "MatISSubassemble" 2783 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2784 { 2785 Mat local_mat,new_mat; 2786 Mat_IS *matis; 2787 IS is_sends_internal; 2788 PetscInt rows,cols; 2789 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2790 PetscBool ismatis,isdense; 2791 ISLocalToGlobalMapping l2gmap; 2792 PetscInt* l2gmap_indices; 2793 const PetscInt* is_indices; 2794 MatType new_local_type; 2795 MatTypePrivate new_local_type_private; 2796 /* buffers */ 2797 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2798 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2799 /* MPI */ 2800 MPI_Comm comm; 2801 PetscMPIInt n_sends,n_recvs,commsize; 2802 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2803 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2804 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2805 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2806 PetscErrorCode ierr; 2807 2808 PetscFunctionBegin; 2809 /* checks */ 2810 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2811 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2812 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2813 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2814 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2815 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2816 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2817 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2818 PetscValidLogicalCollectiveInt(mat,bs,0); 2819 /* prepare IS for sending if not provided */ 2820 if (!is_sends) { 2821 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2822 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2823 } else { 2824 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2825 is_sends_internal = is_sends; 2826 } 2827 2828 /* get pointer of MATIS data */ 2829 matis = (Mat_IS*)mat->data; 2830 2831 /* get comm */ 2832 comm = PetscObjectComm((PetscObject)mat); 2833 2834 /* compute number of sends */ 2835 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2836 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2837 2838 /* compute number of receives */ 2839 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2840 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2841 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2842 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2843 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2844 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2845 ierr = PetscFree(iflags);CHKERRQ(ierr); 2846 2847 /* prepare send/receive buffers */ 2848 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2849 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2850 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2851 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2852 2853 /* Get data from local mat */ 2854 if (!isdense) { 2855 /* TODO: See below some guidelines on how to prepare the local buffers */ 2856 /* 2857 send_buffer_vals should contain the raw values of the local matrix 2858 send_buffer_idxs should contain: 2859 - MatType_PRIVATE type 2860 - PetscInt size_of_l2gmap 2861 - PetscInt global_row_indices[size_of_l2gmap] 2862 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2863 */ 2864 } else { 2865 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2866 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2867 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2868 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2869 send_buffer_idxs[1] = i; 2870 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2871 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2872 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2873 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2874 for (i=0;i<n_sends;i++) { 2875 ilengths_vals[is_indices[i]] = len*len; 2876 ilengths_idxs[is_indices[i]] = len+2; 2877 } 2878 } 2879 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2880 buf_size_idxs = 0; 2881 buf_size_vals = 0; 2882 for (i=0;i<n_recvs;i++) { 2883 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2884 buf_size_vals += (PetscInt)olengths_vals[i]; 2885 } 2886 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2887 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2888 2889 /* get new tags for clean communications */ 2890 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2891 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2892 2893 /* allocate for requests */ 2894 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2895 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2896 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2897 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2898 2899 /* communications */ 2900 ptr_idxs = recv_buffer_idxs; 2901 ptr_vals = recv_buffer_vals; 2902 for (i=0;i<n_recvs;i++) { 2903 source_dest = onodes[i]; 2904 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2905 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2906 ptr_idxs += olengths_idxs[i]; 2907 ptr_vals += olengths_vals[i]; 2908 } 2909 for (i=0;i<n_sends;i++) { 2910 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2911 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2912 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2913 } 2914 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2915 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2916 2917 /* assemble new l2g map */ 2918 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2919 ptr_idxs = recv_buffer_idxs; 2920 buf_size_idxs = 0; 2921 for (i=0;i<n_recvs;i++) { 2922 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2923 ptr_idxs += olengths_idxs[i]; 2924 } 2925 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2926 ptr_idxs = recv_buffer_idxs; 2927 buf_size_idxs = 0; 2928 for (i=0;i<n_recvs;i++) { 2929 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2930 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2931 ptr_idxs += olengths_idxs[i]; 2932 } 2933 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2934 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2935 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2936 2937 /* infer new local matrix type from received local matrices type */ 2938 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2939 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2940 new_local_type_private = MATAIJ_PRIVATE; 2941 new_local_type = MATSEQAIJ; 2942 if (n_recvs) { 2943 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 2944 ptr_idxs = recv_buffer_idxs; 2945 for (i=0;i<n_recvs;i++) { 2946 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2947 new_local_type_private = MATAIJ_PRIVATE; 2948 break; 2949 } 2950 ptr_idxs += olengths_idxs[i]; 2951 } 2952 switch (new_local_type_private) { 2953 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2954 new_local_type = MATSEQAIJ; 2955 bs = 1; 2956 break; 2957 case MATAIJ_PRIVATE: 2958 new_local_type = MATSEQAIJ; 2959 bs = 1; 2960 break; 2961 case MATBAIJ_PRIVATE: 2962 new_local_type = MATSEQBAIJ; 2963 break; 2964 case MATSBAIJ_PRIVATE: 2965 new_local_type = MATSEQSBAIJ; 2966 break; 2967 default: 2968 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2969 break; 2970 } 2971 } 2972 2973 /* create MATIS object */ 2974 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2975 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2976 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2977 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2978 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2979 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2980 2981 /* set values */ 2982 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2983 ptr_vals = recv_buffer_vals; 2984 ptr_idxs = recv_buffer_idxs; 2985 for (i=0;i<n_recvs;i++) { 2986 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2987 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2988 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2989 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2990 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2991 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2992 } 2993 ptr_idxs += olengths_idxs[i]; 2994 ptr_vals += olengths_vals[i]; 2995 } 2996 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2997 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2998 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2999 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3000 3001 { /* check */ 3002 Vec lvec,rvec; 3003 PetscReal infty_error; 3004 3005 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3006 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3007 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3008 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3009 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 3010 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3011 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3012 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3013 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3014 } 3015 3016 /* free workspace */ 3017 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3018 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3019 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3020 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3021 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3022 if (isdense) { 3023 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3024 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3025 } else { 3026 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3027 } 3028 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3029 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3030 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3031 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3032 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3033 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3034 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3035 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3036 ierr = PetscFree(onodes);CHKERRQ(ierr); 3037 /* get back new mat */ 3038 *mat_n = new_mat; 3039 PetscFunctionReturn(0); 3040 } 3041 3042 #undef __FUNCT__ 3043 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3044 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3045 { 3046 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3047 PC_IS *pcis = (PC_IS*)pc->data; 3048 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3049 MatNullSpace CoarseNullSpace=NULL; 3050 ISLocalToGlobalMapping coarse_islg; 3051 IS coarse_is; 3052 PetscInt max_it; 3053 PetscInt im_active=-1,active_procs=-1; 3054 PC pc_temp; 3055 PCType coarse_pc_type; 3056 KSPType coarse_ksp_type; 3057 PetscBool multilevel_requested,multilevel_allowed; 3058 PetscBool setsym,issym,isbddc,isnn; 3059 MatStructure matstruct; 3060 PetscErrorCode ierr; 3061 3062 PetscFunctionBegin; 3063 /* Assign global numbering to coarse dofs */ 3064 if (pcbddc->new_primal_space) { /* this is suboptimal -> we can chech when creating local primal indices */ 3065 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3066 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3067 /* see if we can avoid some work */ 3068 if (pcbddc->coarse_ksp) { /* already present */ 3069 Mat tcoarse_mat; 3070 PetscInt ocoarse_size; 3071 ierr = KSPGetOperators(pcbddc->coarse_ksp,NULL,&tcoarse_mat,NULL);CHKERRQ(ierr); 3072 ierr = MatGetSize(tcoarse_mat,&ocoarse_size,NULL);CHKERRQ(ierr); 3073 if (ocoarse_size != pcbddc->coarse_size) { /* we need to destroy the KSP */ 3074 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 3075 } 3076 } 3077 } 3078 3079 /* infer some info from user */ 3080 issym = PETSC_FALSE; 3081 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3082 multilevel_allowed = PETSC_FALSE; 3083 multilevel_requested = PETSC_FALSE; 3084 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 3085 if (multilevel_requested) { 3086 /* count "active processes" */ 3087 im_active = !!(pcis->n); 3088 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3089 if (active_procs/pcbddc->coarsening_ratio < 2) { 3090 multilevel_allowed = PETSC_FALSE; 3091 } else { 3092 multilevel_allowed = PETSC_TRUE; 3093 } 3094 } 3095 3096 /* set defaults for coarse KSP and PC */ 3097 if (multilevel_allowed) { 3098 if (issym) { 3099 coarse_ksp_type = KSPRICHARDSON; 3100 } else { 3101 coarse_ksp_type = KSPCHEBYSHEV; 3102 } 3103 coarse_pc_type = PCBDDC; 3104 } else { 3105 coarse_ksp_type = KSPPREONLY; 3106 coarse_pc_type = PCREDUNDANT; 3107 } 3108 3109 /* create the coarse KSP object only once with defaults */ 3110 if (!pcbddc->coarse_ksp) { 3111 char prefix[256],str_level[3]; 3112 size_t len; 3113 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3114 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3115 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3116 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3117 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3118 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3119 /* prefix */ 3120 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3121 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3122 if (!pcbddc->current_level) { 3123 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3124 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3125 } else { 3126 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3127 if (pcbddc->current_level>1) len -= 2; 3128 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3129 *(prefix+len)='\0'; 3130 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3131 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3132 } 3133 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3134 } 3135 /* allow user customization */ 3136 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3137 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3138 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3139 3140 /* get some info after set from options */ 3141 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3142 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3143 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3144 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3145 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3146 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3147 isbddc = PETSC_FALSE; 3148 } 3149 3150 /* propagate BDDC info to the next level */ 3151 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3152 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3153 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3154 3155 /* creates temporary MATIS object for coarse matrix */ 3156 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3157 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3158 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3159 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3160 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3161 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3162 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3163 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3164 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3165 3166 /* assemble coarse matrix */ 3167 if (isbddc || isnn) { 3168 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3169 } else { /* need to provide reuse */ 3170 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3171 } 3172 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3173 3174 /* create local to global scatters for coarse problem */ 3175 if (pcbddc->new_primal_space) { 3176 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3177 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 3178 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3179 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3180 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3181 } 3182 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3183 3184 /* propagate symmetry info to coarse matrix */ 3185 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3186 3187 /* Compute coarse null space (special handling by BDDC only) */ 3188 if (pcbddc->NullSpace) { 3189 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3190 if (isbddc) { 3191 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3192 } else { 3193 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3194 } 3195 } 3196 3197 /* set operators */ 3198 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3199 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3200 3201 /* additional KSP customization */ 3202 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3203 if (max_it < 5) { 3204 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3205 } 3206 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3207 3208 3209 /* print some info if requested */ 3210 if (pcbddc->dbg_flag) { 3211 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3212 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3213 if (!multilevel_allowed) { 3214 if (multilevel_requested) { 3215 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3216 } else if (pcbddc->max_levels) { 3217 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3218 } 3219 } 3220 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3221 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3222 } 3223 3224 /* setup coarse ksp */ 3225 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3226 if (pcbddc->dbg_flag) { 3227 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3228 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3229 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3230 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3231 } 3232 3233 /* Check coarse problem if requested */ 3234 if (pcbddc->dbg_flag) { 3235 KSP check_ksp; 3236 KSPType check_ksp_type; 3237 PC check_pc; 3238 Vec check_vec; 3239 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3240 PetscInt its; 3241 PetscBool ispreonly,compute; 3242 3243 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3244 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3245 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3246 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3247 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3248 if (ispreonly) { 3249 check_ksp_type = KSPPREONLY; 3250 compute = PETSC_FALSE; 3251 } else { 3252 if (issym) check_ksp_type = KSPCG; 3253 else check_ksp_type = KSPGMRES; 3254 compute = PETSC_TRUE; 3255 } 3256 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3257 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3258 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3259 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3260 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3261 /* create random vec */ 3262 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3263 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3264 if (CoarseNullSpace) { 3265 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3266 } 3267 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3268 /* solve coarse problem */ 3269 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3270 if (CoarseNullSpace) { 3271 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3272 } 3273 /* check coarse problem residual error */ 3274 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3275 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3276 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3277 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3278 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3279 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3280 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3281 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3282 /* get eigenvalue estimation if preonly has not been requested */ 3283 if (!ispreonly) { 3284 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3285 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3286 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3287 } 3288 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3289 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3290 } 3291 /* free memory */ 3292 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3293 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3294 PetscFunctionReturn(0); 3295 } 3296 3297 #undef __FUNCT__ 3298 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3299 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3300 { 3301 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3302 PC_IS* pcis = (PC_IS*)pc->data; 3303 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3304 PetscInt i,j,coarse_size; 3305 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3306 PetscErrorCode ierr; 3307 3308 PetscFunctionBegin; 3309 /* get indices in local ordering for vertices and constraints */ 3310 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3311 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3312 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3313 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3314 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3315 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3316 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3317 3318 /* Compute global number of coarse dofs */ 3319 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3320 3321 /* check numbering */ 3322 if (pcbddc->dbg_flag) { 3323 PetscScalar coarsesum,*array; 3324 3325 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3326 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3327 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3328 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3329 for (i=0;i<pcbddc->local_primal_size;i++) { 3330 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3331 } 3332 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3333 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3334 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3335 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3336 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3337 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3338 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3339 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3340 for (i=0;i<pcis->n;i++) { 3341 if (array[i] == 1.0) { 3342 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3343 } 3344 } 3345 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3346 for (i=0;i<pcis->n;i++) { 3347 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3348 } 3349 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3350 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3351 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3352 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3353 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3354 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3355 if (pcbddc->dbg_flag > 1) { 3356 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3357 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3358 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3359 for (i=0;i<pcbddc->local_primal_size;i++) { 3360 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]); 3361 } 3362 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3363 } 3364 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3365 } 3366 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3367 /* get back data */ 3368 *coarse_size_n = coarse_size; 3369 *local_primal_indices_n = local_primal_indices; 3370 PetscFunctionReturn(0); 3371 } 3372 3373