1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCSetLevel" 47 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 51 PetscFunctionBegin; 52 pcbddc->current_level=level; 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNCT__ 57 #define __FUNCT__ "PCBDDCResetCustomization" 58 PetscErrorCode PCBDDCResetCustomization(PC pc) 59 { 60 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 61 PetscInt i; 62 PetscErrorCode ierr; 63 64 PetscFunctionBegin; 65 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 66 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 67 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 68 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 69 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 70 for (i=0;i<pcbddc->n_ISForDofs;i++) { 71 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 72 } 73 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNCT__ 78 #define __FUNCT__ "PCBDDCResetTopography" 79 PetscErrorCode PCBDDCResetTopography(PC pc) 80 { 81 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 82 PetscErrorCode ierr; 83 84 PetscFunctionBegin; 85 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 86 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 87 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "PCBDDCResetSolvers" 93 PetscErrorCode PCBDDCResetSolvers(PC pc) 94 { 95 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 101 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 102 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 103 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 104 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 105 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 106 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 107 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 108 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 109 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 110 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 111 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 112 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 113 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 114 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 115 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 116 PetscFunctionReturn(0); 117 } 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "PCBDDCCreateWorkVectors" 121 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 122 { 123 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 124 PC_IS *pcis = (PC_IS*)pc->data; 125 VecType impVecType; 126 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 127 PetscErrorCode ierr; 128 129 PetscFunctionBegin; 130 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 131 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 132 local_primal_size = n_constraints+n_vertices; 133 n_R = pcis->n-n_vertices; 134 /* local work vectors */ 135 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 136 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 137 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 138 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 139 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 140 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 141 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 142 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 143 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 144 if (n_constraints) { 145 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 146 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 147 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 148 } 149 PetscFunctionReturn(0); 150 } 151 152 #undef __FUNCT__ 153 #define __FUNCT__ "PCBDDCSetUpCoarseLocal" 154 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n) 155 { 156 PetscErrorCode ierr; 157 /* pointers to pcis and pcbddc */ 158 PC_IS* pcis = (PC_IS*)pc->data; 159 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 160 /* submatrices of local problem */ 161 Mat A_RV,A_VR,A_VV; 162 /* working matrices */ 163 Mat M1,M2,M3,C_CR; 164 /* working vectors */ 165 Vec vec1_C,vec2_C,vec1_V,vec2_V; 166 /* additional working stuff */ 167 IS is_aux; 168 ISLocalToGlobalMapping BtoNmap; 169 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 170 const PetscScalar *array,*row_cmat_values; 171 const PetscInt *row_cmat_indices,*idx_R_local; 172 PetscInt *vertices,*idx_V_B,*auxindices; 173 PetscInt n_vertices,n_constraints,size_of_constraint; 174 PetscInt i,j,n_R,n_D,n_B; 175 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 176 /* Vector and matrix types */ 177 VecType impVecType; 178 MatType impMatType; 179 /* some shortcuts to scalars */ 180 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 181 /* for debugging purposes */ 182 PetscReal *coarsefunctions_errors,*constraints_errors; 183 184 PetscFunctionBegin; 185 /* get number of vertices and their local indices */ 186 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 187 n_constraints = pcbddc->local_primal_size-n_vertices; 188 /* Set Non-overlapping dimensions */ 189 n_B = pcis->n_B; n_D = pcis->n - n_B; 190 n_R = pcis->n-n_vertices; 191 192 /* Set types for local objects needed by BDDC precondtioner */ 193 impMatType = MATSEQDENSE; 194 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 195 196 /* Allocating some extra storage just to be safe */ 197 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 198 for (i=0;i<pcis->n;i++) auxindices[i]=i; 199 200 /* vertices in boundary numbering */ 201 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 202 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 203 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 204 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 205 if (i != n_vertices) { 206 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 207 } 208 209 /* some work vectors on vertices and/or constraints */ 210 if (n_vertices) { 211 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 212 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 213 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 214 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 215 } 216 if (n_constraints) { 217 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 218 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 219 } 220 221 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 222 if (n_constraints) { 223 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 224 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 225 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 226 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 227 228 /* Extract constraints on R nodes: C_{CR} */ 229 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 230 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 231 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 232 233 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 234 for (i=0;i<n_constraints;i++) { 235 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 236 /* Get row of constraint matrix in R numbering */ 237 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 238 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 239 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 240 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 241 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 242 /* Solve for row of constraint matrix in R numbering */ 243 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 244 /* Set values in local_auxmat2 */ 245 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 246 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 247 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 248 } 249 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 250 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 251 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 252 253 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 254 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 255 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 256 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 257 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 258 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 259 ierr = MatSetUp(M1);CHKERRQ(ierr); 260 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 261 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 262 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 263 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 264 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 265 ierr = MatDestroy(&M2);CHKERRQ(ierr); 266 ierr = MatDestroy(&M3);CHKERRQ(ierr); 267 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 268 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 269 } 270 271 /* Get submatrices from subdomain matrix */ 272 if (n_vertices) { 273 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr); 274 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 275 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 276 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 277 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 278 } 279 280 /* Matrix of coarse basis functions (local) */ 281 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 282 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 283 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 284 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 285 if (pcbddc->switch_static || pcbddc->dbg_flag) { 286 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 287 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 288 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 289 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 290 } 291 292 if (pcbddc->dbg_flag) { 293 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 294 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 295 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 296 } 297 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 298 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 299 300 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 301 302 /* vertices */ 303 for (i=0;i<n_vertices;i++) { 304 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 305 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 306 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 307 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 308 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 309 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 310 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 311 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 312 if (n_constraints) { 313 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 314 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 315 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 316 } 317 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 318 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 319 320 /* Set values in coarse basis function and subdomain part of coarse_mat */ 321 /* coarse basis functions */ 322 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 323 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 324 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 325 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 326 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 327 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 328 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 329 if (pcbddc->switch_static || pcbddc->dbg_flag) { 330 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 331 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 332 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 333 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 334 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 335 } 336 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 337 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 338 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 339 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 340 if (n_constraints) { 341 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 342 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 343 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 344 } 345 346 /* check */ 347 if (pcbddc->dbg_flag) { 348 /* assemble subdomain vector on local nodes */ 349 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 350 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 351 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 352 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 353 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 354 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 355 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 356 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 357 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 358 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 359 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 360 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 361 if (n_constraints) { 362 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 363 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 364 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 365 } 366 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 367 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 368 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 369 /* check saddle point solution */ 370 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 371 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 372 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 373 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 374 /* shift by the identity matrix */ 375 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 376 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 377 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 378 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 379 } 380 } 381 382 /* constraints */ 383 for (i=0;i<n_constraints;i++) { 384 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 385 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 386 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 387 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 388 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 389 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 390 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 391 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 392 if (n_vertices) { 393 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 394 } 395 /* Set values in coarse basis function and subdomain part of coarse_mat */ 396 /* coarse basis functions */ 397 j = i+n_vertices; /* don't touch this! */ 398 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 399 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 400 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 401 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 402 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 403 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 404 if (pcbddc->switch_static || pcbddc->dbg_flag) { 405 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 406 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 407 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 408 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 409 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 410 } 411 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 412 if (n_vertices) { 413 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 414 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 416 } 417 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 418 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 419 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 420 421 if (pcbddc->dbg_flag) { 422 /* assemble subdomain vector on nodes */ 423 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 424 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 425 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 426 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 427 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 428 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 429 /* assemble subdomain vector of lagrange multipliers */ 430 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 431 if (n_vertices) { 432 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 433 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 434 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 435 } 436 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 437 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 438 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 439 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 440 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 441 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 442 /* check saddle point solution */ 443 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 444 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 445 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 446 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 447 /* shift by the identity matrix */ 448 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 449 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 450 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 451 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 452 } 453 } 454 /* call assembling routines for local coarse basis */ 455 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 456 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 457 if (pcbddc->switch_static || pcbddc->dbg_flag) { 458 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 459 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 460 } 461 462 /* compute other basis functions for non-symmetric problems */ 463 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 464 if (!setsym || (setsym && !issym)) { 465 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 466 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 467 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 468 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 469 if (pcbddc->switch_static || pcbddc->dbg_flag) { 470 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 471 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 472 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 473 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 474 } 475 for (i=0;i<pcbddc->local_primal_size;i++) { 476 if (n_constraints) { 477 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 478 for (j=0;j<n_constraints;j++) { 479 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 480 } 481 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 482 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 483 } 484 if (i<n_vertices) { 485 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 486 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 487 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 488 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 489 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 490 if (n_constraints) { 491 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 492 } 493 } else { 494 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 495 } 496 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 497 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 498 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 499 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 500 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 501 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 502 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 503 if (i<n_vertices) { 504 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 505 } 506 if (pcbddc->switch_static || pcbddc->dbg_flag) { 507 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 508 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 509 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 510 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 511 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 512 } 513 514 if (pcbddc->dbg_flag) { 515 /* assemble subdomain vector on nodes */ 516 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 517 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 518 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 519 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 520 if (i<n_vertices) { 521 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 522 } 523 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 524 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 525 /* assemble subdomain vector of lagrange multipliers */ 526 for (j=0;j<pcbddc->local_primal_size;j++) { 527 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 528 } 529 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 530 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 531 /* check saddle point solution */ 532 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 533 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 534 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 535 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 536 /* shift by the identity matrix */ 537 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 538 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 539 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 540 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 541 } 542 } 543 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 544 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 545 if (pcbddc->switch_static || pcbddc->dbg_flag) { 546 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 547 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 548 } 549 } 550 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 551 /* Checking coarse_sub_mat and coarse basis functios */ 552 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 553 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 554 if (pcbddc->dbg_flag) { 555 Mat coarse_sub_mat; 556 Mat AUXMAT,TM1,TM2,TM3,TM4; 557 Mat coarse_phi_D,coarse_phi_B; 558 Mat coarse_psi_D,coarse_psi_B; 559 Mat A_II,A_BB,A_IB,A_BI; 560 MatType checkmattype=MATSEQAIJ; 561 PetscReal real_value; 562 563 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 564 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 565 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 566 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 567 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 568 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 569 if (pcbddc->coarse_psi_B) { 570 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 571 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 572 } 573 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 574 575 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 576 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 577 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 578 if (pcbddc->coarse_psi_B) { 579 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 580 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 581 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 582 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 583 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 584 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 585 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 586 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 587 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 588 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 589 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 590 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 591 } else { 592 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 593 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 594 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 595 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 596 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 597 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 598 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 599 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 600 } 601 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 602 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 603 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 604 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 605 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 606 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 607 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 608 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 609 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 610 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 611 for (i=0;i<pcbddc->local_primal_size;i++) { 612 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 613 } 614 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 615 for (i=0;i<pcbddc->local_primal_size;i++) { 616 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 617 } 618 if (pcbddc->coarse_psi_B) { 619 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 620 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 622 } 623 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 624 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 625 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 626 } 627 } 628 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 629 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 630 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 631 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 632 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 633 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 634 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 635 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 636 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 637 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 638 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 639 if (pcbddc->coarse_psi_B) { 640 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 641 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 642 } 643 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 644 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 645 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 646 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 647 } 648 /* free memory */ 649 if (n_vertices) { 650 ierr = PetscFree(vertices);CHKERRQ(ierr); 651 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 652 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 653 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 654 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 655 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 656 } 657 if (n_constraints) { 658 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 659 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 660 ierr = MatDestroy(&M1);CHKERRQ(ierr); 661 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 662 } 663 ierr = PetscFree(auxindices);CHKERRQ(ierr); 664 /* get back data */ 665 *coarse_submat_vals_n = coarse_submat_vals; 666 PetscFunctionReturn(0); 667 } 668 669 #undef __FUNCT__ 670 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 671 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 672 { 673 PC_IS* pcis = (PC_IS*)(pc->data); 674 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 675 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 676 /* manage repeated solves */ 677 MatReuse reuse; 678 MatStructure matstruct; 679 PetscErrorCode ierr; 680 681 PetscFunctionBegin; 682 /* get mat flags */ 683 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 684 reuse = MAT_INITIAL_MATRIX; 685 if (pc->setupcalled) { 686 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 687 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 688 if (matstruct == SAME_NONZERO_PATTERN) { 689 reuse = MAT_REUSE_MATRIX; 690 } else { 691 reuse = MAT_INITIAL_MATRIX; 692 } 693 } 694 if (reuse == MAT_INITIAL_MATRIX) { 695 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 696 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 700 } 701 702 /* transform local matrices if needed */ 703 if (pcbddc->use_change_of_basis) { 704 Mat change_mat_all; 705 PetscScalar *row_cmat_values; 706 PetscInt *row_cmat_indices; 707 PetscInt *nnz,*is_indices,*temp_indices; 708 PetscInt i,j,k,n_D,n_B; 709 710 /* Get Non-overlapping dimensions */ 711 n_B = pcis->n_B; 712 n_D = pcis->n-n_B; 713 714 /* compute nonzero structure of change of basis on all local nodes */ 715 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 716 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 717 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 718 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 719 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 720 k=1; 721 for (i=0;i<n_B;i++) { 722 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 723 nnz[is_indices[i]]=j; 724 if (k < j) k = j; 725 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 726 } 727 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 728 /* assemble change of basis matrix on the whole set of local dofs */ 729 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 730 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 731 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 732 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 733 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 734 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 735 for (i=0;i<n_D;i++) { 736 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 737 } 738 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 739 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 740 for (i=0;i<n_B;i++) { 741 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 742 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 743 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 744 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 745 } 746 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 747 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 748 /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */ 749 ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr); 750 if (i==1) { 751 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 752 } else { 753 Mat work_mat; 754 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 755 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 756 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 757 } 758 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 759 ierr = PetscFree(nnz);CHKERRQ(ierr); 760 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 761 } else { 762 /* without change of basis, the local matrix is unchanged */ 763 if (!pcbddc->local_mat) { 764 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 765 pcbddc->local_mat = matis->A; 766 } 767 } 768 769 /* get submatrices */ 770 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 771 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 772 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 773 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 774 PetscFunctionReturn(0); 775 } 776 777 #undef __FUNCT__ 778 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 779 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 780 { 781 PC_IS* pcis = (PC_IS*)(pc->data); 782 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 783 IS is_aux1,is_aux2; 784 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 785 PetscInt n_vertices,n_constraints,i,j,n_R,n_D,n_B; 786 PetscBT bitmask; 787 PetscErrorCode ierr; 788 789 PetscFunctionBegin; 790 /* Set Non-overlapping dimensions */ 791 n_B = pcis->n_B; n_D = pcis->n - n_B; 792 /* get vertex indices from constraint matrix */ 793 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 794 /* Set number of constraints */ 795 n_constraints = pcbddc->local_primal_size-n_vertices; 796 /* create auxiliary bitmask */ 797 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 798 for (i=0;i<n_vertices;i++) { 799 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 800 } 801 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 802 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 803 for (i=0, n_R=0; i<pcis->n; i++) { 804 if (!PetscBTLookup(bitmask,i)) { 805 idx_R_local[n_R] = i; 806 n_R++; 807 } 808 } 809 ierr = PetscFree(vertices);CHKERRQ(ierr); 810 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr); 811 812 /* print some info if requested */ 813 if (pcbddc->dbg_flag) { 814 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 815 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 816 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 817 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 818 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 819 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 820 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 821 } 822 823 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 824 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 825 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 826 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 827 for (i=0; i<n_D; i++) { 828 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 829 } 830 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 831 for (i=0, j=0; i<n_R; i++) { 832 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 833 aux_array1[j++] = i; 834 } 835 } 836 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 837 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 838 for (i=0, j=0; i<n_B; i++) { 839 if (!PetscBTLookup(bitmask,is_indices[i])) { 840 aux_array2[j++] = i; 841 } 842 } 843 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 844 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 845 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 846 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 847 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 848 849 if (pcbddc->switch_static || pcbddc->dbg_flag) { 850 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 851 for (i=0, j=0; i<n_R; i++) { 852 if (PetscBTLookup(bitmask,idx_R_local[i])) { 853 aux_array1[j++] = i; 854 } 855 } 856 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 857 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 858 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 859 } 860 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 861 PetscFunctionReturn(0); 862 } 863 864 #undef __FUNCT__ 865 #define __FUNCT__ "PCBDDCSetUseExactDirichlet" 866 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use) 867 { 868 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 869 870 PetscFunctionBegin; 871 pcbddc->use_exact_dirichlet=use; 872 PetscFunctionReturn(0); 873 } 874 875 #undef __FUNCT__ 876 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 877 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 878 { 879 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 880 PC_IS *pcis = (PC_IS*)pc->data; 881 PC pc_temp; 882 Mat A_RR; 883 Vec vec1,vec2,vec3; 884 MatStructure matstruct; 885 PetscScalar m_one = -1.0; 886 PetscReal value; 887 PetscInt n_D,n_R,use_exact,use_exact_reduced; 888 PetscErrorCode ierr; 889 890 PetscFunctionBegin; 891 /* Creating PC contexts for local Dirichlet and Neumann problems */ 892 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 893 894 /* DIRICHLET PROBLEM */ 895 /* Matrix for Dirichlet problem is pcis->A_II */ 896 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 897 if (!pcbddc->ksp_D) { /* create object if not yet build */ 898 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 899 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 900 /* default */ 901 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 902 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 903 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 904 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 905 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 906 } 907 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 908 /* Allow user's customization */ 909 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 910 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 911 if (!n_D) { 912 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 913 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 914 } 915 /* Set Up KSP for Dirichlet problem of BDDC */ 916 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 917 /* set ksp_D into pcis data */ 918 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 919 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 920 pcis->ksp_D = pcbddc->ksp_D; 921 922 /* NEUMANN PROBLEM */ 923 /* Matrix for Neumann problem is A_RR -> we need to create it */ 924 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 925 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 926 if (!pcbddc->ksp_R) { /* create object if not yet build */ 927 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 928 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 929 /* default */ 930 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 931 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 932 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 933 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 934 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 935 } 936 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 937 /* Allow user's customization */ 938 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 939 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 940 if (!n_R) { 941 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 942 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 943 } 944 /* Set Up KSP for Neumann problem of BDDC */ 945 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 946 947 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 948 949 /* Dirichlet */ 950 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 951 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 952 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 953 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 954 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 955 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 956 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 957 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 958 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 959 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 960 /* need to be adapted? */ 961 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 962 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 963 ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr); 964 /* print info */ 965 if (pcbddc->dbg_flag) { 966 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 967 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 968 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 969 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 970 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 971 } 972 if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 973 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 974 } 975 976 /* Neumann */ 977 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 978 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 979 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 980 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 981 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 982 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 983 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 984 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 985 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 986 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 987 /* need to be adapted? */ 988 use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1); 989 if (PetscAbsReal(value) > 1.e-4) use_exact = 0; 990 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 991 /* print info */ 992 if (pcbddc->dbg_flag) { 993 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 994 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 995 } 996 if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 997 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 998 } 999 1000 /* free Neumann problem's matrix */ 1001 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1007 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1008 { 1009 PetscErrorCode ierr; 1010 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1011 1012 PetscFunctionBegin; 1013 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1014 if (pcbddc->local_auxmat1) { 1015 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1016 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1017 } 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1023 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1024 { 1025 PetscErrorCode ierr; 1026 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1027 PC_IS* pcis = (PC_IS*) (pc->data); 1028 const PetscScalar zero = 0.0; 1029 1030 PetscFunctionBegin; 1031 /* Application of PHI^T (or PSI^T) */ 1032 if (pcbddc->coarse_psi_B) { 1033 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1034 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1035 } else { 1036 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1037 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1038 } 1039 /* Scatter data of coarse_rhs */ 1040 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1041 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 1043 /* Local solution on R nodes */ 1044 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1045 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1046 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1047 if (pcbddc->switch_static) { 1048 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1049 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1050 } 1051 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1052 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1053 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1054 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1055 if (pcbddc->switch_static) { 1056 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1057 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1058 } 1059 1060 /* Coarse solution */ 1061 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1062 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1063 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1064 } 1065 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1066 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1067 1068 /* Sum contributions from two levels */ 1069 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1070 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1071 PetscFunctionReturn(0); 1072 } 1073 1074 #undef __FUNCT__ 1075 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1076 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1077 { 1078 PetscErrorCode ierr; 1079 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1080 1081 PetscFunctionBegin; 1082 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1088 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1089 { 1090 PetscErrorCode ierr; 1091 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1092 1093 PetscFunctionBegin; 1094 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 /* uncomment for testing purposes */ 1099 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1102 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1103 { 1104 PetscErrorCode ierr; 1105 PC_IS* pcis = (PC_IS*)(pc->data); 1106 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1107 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1108 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1109 MatType impMatType=MATSEQAIJ; 1110 /* one and zero */ 1111 PetscScalar one=1.0,zero=0.0; 1112 /* space to store constraints and their local indices */ 1113 PetscScalar *temp_quadrature_constraint; 1114 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1115 /* iterators */ 1116 PetscInt i,j,k,total_counts,temp_start_ptr; 1117 /* stuff to store connected components stored in pcbddc->mat_graph */ 1118 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1119 PetscInt n_ISForFaces,n_ISForEdges; 1120 /* near null space stuff */ 1121 MatNullSpace nearnullsp; 1122 const Vec *nearnullvecs; 1123 Vec *localnearnullsp; 1124 PetscBool nnsp_has_cnst; 1125 PetscInt nnsp_size; 1126 PetscScalar *array; 1127 /* BLAS integers */ 1128 PetscBLASInt lwork,lierr; 1129 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1130 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1131 /* LAPACK working arrays for SVD or POD */ 1132 PetscBool skip_lapack; 1133 PetscScalar *work; 1134 PetscReal *singular_vals; 1135 #if defined(PETSC_USE_COMPLEX) 1136 PetscReal *rwork; 1137 #endif 1138 #if defined(PETSC_MISSING_LAPACK_GESVD) 1139 PetscBLASInt Blas_one_2=1; 1140 PetscScalar *temp_basis,*correlation_mat; 1141 #else 1142 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1143 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1144 #endif 1145 /* change of basis */ 1146 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1147 PetscBool boolforchange; 1148 PetscBT touched,change_basis; 1149 /* auxiliary stuff */ 1150 PetscInt *nnz,*is_indices,*local_to_B; 1151 /* some quantities */ 1152 PetscInt n_vertices,total_primal_vertices; 1153 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1154 1155 1156 PetscFunctionBegin; 1157 /* Get index sets for faces, edges and vertices from graph */ 1158 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1159 pcbddc->use_vertices = PETSC_TRUE; 1160 } 1161 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1162 /* print some info */ 1163 if (pcbddc->dbg_flag) { 1164 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1165 i = 0; 1166 if (ISForVertices) { 1167 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1168 } 1169 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1170 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1171 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1172 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1173 } 1174 /* check if near null space is attached to global mat */ 1175 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1176 if (nearnullsp) { 1177 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1178 } else { /* if near null space is not provided BDDC uses constants by default */ 1179 nnsp_size = 0; 1180 nnsp_has_cnst = PETSC_TRUE; 1181 } 1182 /* get max number of constraints on a single cc */ 1183 max_constraints = nnsp_size; 1184 if (nnsp_has_cnst) max_constraints++; 1185 1186 /* 1187 Evaluate maximum storage size needed by the procedure 1188 - temp_indices will contain start index of each constraint stored as follows 1189 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1190 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1191 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1192 */ 1193 total_counts = n_ISForFaces+n_ISForEdges; 1194 total_counts *= max_constraints; 1195 n_vertices = 0; 1196 if (ISForVertices) { 1197 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1198 } 1199 total_counts += n_vertices; 1200 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1201 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1202 total_counts = 0; 1203 max_size_of_constraint = 0; 1204 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1205 if (i<n_ISForEdges) { 1206 used_IS = &ISForEdges[i]; 1207 } else { 1208 used_IS = &ISForFaces[i-n_ISForEdges]; 1209 } 1210 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1211 total_counts += j; 1212 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1213 } 1214 total_counts *= max_constraints; 1215 total_counts += n_vertices; 1216 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1217 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1218 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1219 /* local to boundary numbering */ 1220 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1221 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1222 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1223 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1224 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1225 /* get local part of global near null space vectors */ 1226 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1227 for (k=0;k<nnsp_size;k++) { 1228 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1229 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1231 } 1232 1233 /* whether or not to skip lapack calls */ 1234 skip_lapack = PETSC_TRUE; 1235 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1236 1237 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1238 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1239 PetscScalar temp_work; 1240 #if defined(PETSC_MISSING_LAPACK_GESVD) 1241 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1242 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1243 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1244 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1245 #if defined(PETSC_USE_COMPLEX) 1246 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1247 #endif 1248 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1249 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1250 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1251 lwork = -1; 1252 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1253 #if !defined(PETSC_USE_COMPLEX) 1254 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1255 #else 1256 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1257 #endif 1258 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1259 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1260 #else /* on missing GESVD */ 1261 /* SVD */ 1262 PetscInt max_n,min_n; 1263 max_n = max_size_of_constraint; 1264 min_n = max_constraints; 1265 if (max_size_of_constraint < max_constraints) { 1266 min_n = max_size_of_constraint; 1267 max_n = max_constraints; 1268 } 1269 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1270 #if defined(PETSC_USE_COMPLEX) 1271 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1272 #endif 1273 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1274 lwork = -1; 1275 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1276 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1277 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1278 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1279 #if !defined(PETSC_USE_COMPLEX) 1280 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1281 #else 1282 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1283 #endif 1284 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1285 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1286 #endif /* on missing GESVD */ 1287 /* Allocate optimal workspace */ 1288 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1289 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1290 } 1291 /* Now we can loop on constraining sets */ 1292 total_counts = 0; 1293 temp_indices[0] = 0; 1294 /* vertices */ 1295 if (ISForVertices) { 1296 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1297 if (nnsp_has_cnst) { /* consider all vertices */ 1298 for (i=0;i<n_vertices;i++) { 1299 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1300 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1301 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1302 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1303 total_counts++; 1304 } 1305 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1306 PetscBool used_vertex; 1307 for (i=0;i<n_vertices;i++) { 1308 used_vertex = PETSC_FALSE; 1309 k = 0; 1310 while (!used_vertex && k<nnsp_size) { 1311 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1312 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1313 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1314 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1315 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1316 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1317 total_counts++; 1318 used_vertex = PETSC_TRUE; 1319 } 1320 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1321 k++; 1322 } 1323 } 1324 } 1325 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1326 n_vertices = total_counts; 1327 } 1328 1329 /* edges and faces */ 1330 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1331 if (i<n_ISForEdges) { 1332 used_IS = &ISForEdges[i]; 1333 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1334 } else { 1335 used_IS = &ISForFaces[i-n_ISForEdges]; 1336 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1337 } 1338 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1339 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1340 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1341 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1342 /* change of basis should not be performed on local periodic nodes */ 1343 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1344 if (nnsp_has_cnst) { 1345 PetscScalar quad_value; 1346 temp_constraints++; 1347 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1348 for (j=0;j<size_of_constraint;j++) { 1349 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1350 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1351 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1352 } 1353 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1354 if (boolforchange) { 1355 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1356 } 1357 total_counts++; 1358 } 1359 for (k=0;k<nnsp_size;k++) { 1360 PetscReal real_value; 1361 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1362 for (j=0;j<size_of_constraint;j++) { 1363 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1364 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1365 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1366 } 1367 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1368 /* check if array is null on the connected component */ 1369 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1370 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1371 if (real_value > 0.0) { /* keep indices and values */ 1372 temp_constraints++; 1373 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1374 if (boolforchange) { 1375 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1376 } 1377 total_counts++; 1378 } 1379 } 1380 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1381 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */ 1382 if (!pcbddc->use_nnsp_true) { 1383 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1384 1385 #if defined(PETSC_MISSING_LAPACK_GESVD) 1386 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1387 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1388 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1389 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1390 from that computed using LAPACKgesvd 1391 -> This is due to a different computation of eigenvectors in LAPACKheev 1392 -> The quality of the POD-computed basis will be the same */ 1393 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1394 /* Store upper triangular part of correlation matrix */ 1395 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1396 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1397 for (j=0;j<temp_constraints;j++) { 1398 for (k=0;k<j+1;k++) { 1399 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1400 } 1401 } 1402 /* compute eigenvalues and eigenvectors of correlation matrix */ 1403 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1404 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1405 #if !defined(PETSC_USE_COMPLEX) 1406 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1407 #else 1408 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1409 #endif 1410 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1411 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1412 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1413 j=0; 1414 while (j < temp_constraints && singular_vals[j] < tol) j++; 1415 total_counts=total_counts-j; 1416 /* scale and copy POD basis into used quadrature memory */ 1417 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1418 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1419 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1420 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1421 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1422 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1423 if (j<temp_constraints) { 1424 PetscInt ii; 1425 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1426 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1427 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1428 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1429 for (k=0;k<temp_constraints-j;k++) { 1430 for (ii=0;ii<size_of_constraint;ii++) { 1431 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1432 } 1433 } 1434 } 1435 #else /* on missing GESVD */ 1436 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1437 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1438 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1439 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1440 #if !defined(PETSC_USE_COMPLEX) 1441 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1442 #else 1443 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1444 #endif 1445 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1446 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1447 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1448 k = temp_constraints; 1449 if (k > size_of_constraint) k = size_of_constraint; 1450 j = 0; 1451 while (j < k && singular_vals[k-j-1] < tol) j++; 1452 total_counts = total_counts-temp_constraints+k-j; 1453 #endif /* on missing GESVD */ 1454 } 1455 } 1456 /* free index sets of faces, edges and vertices */ 1457 for (i=0;i<n_ISForFaces;i++) { 1458 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1459 } 1460 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1461 for (i=0;i<n_ISForEdges;i++) { 1462 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1463 } 1464 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1465 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1466 1467 /* free workspace */ 1468 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1469 ierr = PetscFree(work);CHKERRQ(ierr); 1470 #if defined(PETSC_USE_COMPLEX) 1471 ierr = PetscFree(rwork);CHKERRQ(ierr); 1472 #endif 1473 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1474 #if defined(PETSC_MISSING_LAPACK_GESVD) 1475 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1476 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1477 #endif 1478 } 1479 for (k=0;k<nnsp_size;k++) { 1480 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1481 } 1482 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1483 1484 /* set quantities in pcbddc data structure */ 1485 /* n_vertices defines the number of subdomain corners in the primal space */ 1486 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1487 pcbddc->local_primal_size = total_counts; 1488 pcbddc->n_vertices = n_vertices; 1489 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1490 1491 /* Create constraint matrix */ 1492 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1493 /* so we need to set it up properly either with or without change of basis */ 1494 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1495 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1496 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1497 /* array to compute a local numbering of constraints : vertices first then constraints */ 1498 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1499 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1500 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1501 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1502 /* auxiliary stuff for basis change */ 1503 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1504 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1505 1506 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1507 total_primal_vertices=0; 1508 for (i=0;i<pcbddc->local_primal_size;i++) { 1509 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1510 if (size_of_constraint == 1) { 1511 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1512 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1513 aux_primal_minloc[total_primal_vertices]=0; 1514 total_primal_vertices++; 1515 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1516 PetscInt min_loc,min_index; 1517 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1518 /* find first untouched local node */ 1519 k = 0; 1520 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1521 min_index = global_indices[k]; 1522 min_loc = k; 1523 /* search the minimum among global nodes already untouched on the cc */ 1524 for (k=1;k<size_of_constraint;k++) { 1525 /* there can be more than one constraint on a single connected component */ 1526 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1527 min_index = global_indices[k]; 1528 min_loc = k; 1529 } 1530 } 1531 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1532 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1533 aux_primal_minloc[total_primal_vertices]=min_loc; 1534 total_primal_vertices++; 1535 } 1536 } 1537 /* free workspace */ 1538 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1539 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1540 /* permute indices in order to have a sorted set of vertices */ 1541 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering); 1542 1543 /* nonzero structure of constraint matrix */ 1544 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1545 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1546 j=total_primal_vertices; 1547 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1548 if (!PetscBTLookup(change_basis,i)) { 1549 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1550 j++; 1551 } 1552 } 1553 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1554 ierr = PetscFree(nnz);CHKERRQ(ierr); 1555 /* set values in constraint matrix */ 1556 for (i=0;i<total_primal_vertices;i++) { 1557 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1558 } 1559 total_counts = total_primal_vertices; 1560 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1561 if (!PetscBTLookup(change_basis,i)) { 1562 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1563 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1564 total_counts++; 1565 } 1566 } 1567 /* assembling */ 1568 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1569 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1570 /* 1571 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1572 */ 1573 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1574 if (pcbddc->use_change_of_basis) { 1575 PetscBool qr_needed = PETSC_FALSE; 1576 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1577 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1578 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1579 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1580 /* work arrays */ 1581 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1582 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1583 /* nonzeros per row */ 1584 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1585 if (PetscBTLookup(change_basis,i)) { 1586 qr_needed = PETSC_TRUE; 1587 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1588 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1589 } 1590 } 1591 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1592 ierr = PetscFree(nnz);CHKERRQ(ierr); 1593 /* Set initial identity in the matrix */ 1594 for (i=0;i<pcis->n_B;i++) { 1595 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1596 } 1597 1598 /* Now we loop on the constraints which need a change of basis */ 1599 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 1600 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */ 1601 /* Change of basis matrix T computed via QR decomposition of constraints */ 1602 if (qr_needed) { 1603 /* dual and primal dofs on a single cc */ 1604 PetscInt dual_dofs,primal_dofs; 1605 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1606 PetscInt primal_counter; 1607 /* working stuff for GEQRF */ 1608 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1609 PetscBLASInt lqr_work; 1610 /* working stuff for UNGQR */ 1611 PetscScalar *gqr_work,lgqr_work_t; 1612 PetscBLASInt lgqr_work; 1613 /* working stuff for TRTRS */ 1614 PetscScalar *trs_rhs; 1615 PetscBLASInt Blas_NRHS; 1616 /* pointers for values insertion into change of basis matrix */ 1617 PetscInt *start_rows,*start_cols; 1618 PetscScalar *start_vals; 1619 /* working stuff for values insertion */ 1620 PetscBT is_primal; 1621 1622 /* space to store Q */ 1623 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1624 /* first we issue queries for optimal work */ 1625 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1626 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1627 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1628 lqr_work = -1; 1629 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1630 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1631 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1632 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1633 lgqr_work = -1; 1634 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1635 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1636 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1637 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1638 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1639 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1640 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1641 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1642 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1643 /* array to store scaling factors for reflectors */ 1644 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1645 /* array to store rhs and solution of triangular solver */ 1646 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1647 /* array to store whether a node is primal or not */ 1648 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1649 for (i=0;i<total_primal_vertices;i++) { 1650 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1651 } 1652 1653 /* allocating workspace for check */ 1654 if (pcbddc->dbg_flag) { 1655 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1656 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1657 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1658 } 1659 1660 /* loop on constraints and see whether or not they need a change of basis */ 1661 /* -> using implicit ordering contained in temp_indices data */ 1662 total_counts = pcbddc->n_vertices; 1663 primal_counter = total_counts; 1664 while (total_counts<pcbddc->local_primal_size) { 1665 primal_dofs = 1; 1666 if (PetscBTLookup(change_basis,total_counts)) { 1667 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1668 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1669 primal_dofs++; 1670 } 1671 /* get constraint info */ 1672 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1673 dual_dofs = size_of_constraint-primal_dofs; 1674 1675 /* copy quadrature constraints for change of basis check */ 1676 if (pcbddc->dbg_flag) { 1677 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr); 1678 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1679 } 1680 1681 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1682 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1683 1684 /* compute QR decomposition of constraints */ 1685 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1686 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1687 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1688 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1689 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1690 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1691 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1692 1693 /* explictly compute R^-T */ 1694 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1695 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1696 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1697 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1698 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1699 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1700 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1701 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1702 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1703 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1704 1705 /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1706 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1707 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1708 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1709 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1710 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1711 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1712 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1713 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1714 1715 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1716 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1717 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1718 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1719 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1720 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1721 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1722 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1723 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1724 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1725 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1726 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1727 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1728 1729 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1730 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1731 /* insert cols for primal dofs */ 1732 for (j=0;j<primal_dofs;j++) { 1733 start_vals = &qr_basis[j*size_of_constraint]; 1734 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1735 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1736 } 1737 /* insert cols for dual dofs */ 1738 for (j=0,k=0;j<dual_dofs;k++) { 1739 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1740 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1741 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1742 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1743 j++; 1744 } 1745 } 1746 1747 /* check change of basis */ 1748 if (pcbddc->dbg_flag) { 1749 PetscInt ii,jj; 1750 PetscBool valid_qr=PETSC_TRUE; 1751 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1752 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1753 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1754 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1755 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1756 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1757 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1758 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1759 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1760 for (jj=0;jj<size_of_constraint;jj++) { 1761 for (ii=0;ii<primal_dofs;ii++) { 1762 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1763 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1764 } 1765 } 1766 if (!valid_qr) { 1767 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1768 for (jj=0;jj<size_of_constraint;jj++) { 1769 for (ii=0;ii<primal_dofs;ii++) { 1770 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1771 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1772 } 1773 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1774 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1775 } 1776 } 1777 } 1778 } else { 1779 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1780 } 1781 } 1782 /* increment primal counter */ 1783 primal_counter += primal_dofs; 1784 } else { 1785 if (pcbddc->dbg_flag) { 1786 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1787 } 1788 } 1789 /* increment constraint counter total_counts */ 1790 total_counts += primal_dofs; 1791 } 1792 if (pcbddc->dbg_flag) { 1793 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1794 ierr = PetscFree(work);CHKERRQ(ierr); 1795 } 1796 /* free workspace */ 1797 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1798 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1799 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1800 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1801 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1802 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1803 } 1804 /* assembling */ 1805 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1806 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1807 /* 1808 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1809 */ 1810 } 1811 /* free workspace */ 1812 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1813 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1814 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1815 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1816 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1817 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1818 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1819 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1820 PetscFunctionReturn(0); 1821 } 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "PCBDDCAnalyzeInterface" 1825 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 1826 { 1827 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1828 PC_IS *pcis = (PC_IS*)pc->data; 1829 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1830 PetscInt bs,ierr,i,vertex_size; 1831 PetscViewer viewer=pcbddc->dbg_viewer; 1832 1833 PetscFunctionBegin; 1834 /* Init local Graph struct */ 1835 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 1836 1837 /* Check validity of the csr graph passed in by the user */ 1838 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 1839 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1840 } 1841 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 1842 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 1843 Mat mat_adj; 1844 const PetscInt *xadj,*adjncy; 1845 PetscBool flg_row=PETSC_TRUE; 1846 1847 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1848 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1849 if (!flg_row) { 1850 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1851 } 1852 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1853 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1854 if (!flg_row) { 1855 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1856 } 1857 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1858 } 1859 1860 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 1861 vertex_size = 1; 1862 if (!pcbddc->n_ISForDofs) { 1863 IS *custom_ISForDofs; 1864 1865 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1866 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 1867 for (i=0;i<bs;i++) { 1868 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 1869 } 1870 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 1871 /* remove my references to IS objects */ 1872 for (i=0;i<bs;i++) { 1873 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 1874 } 1875 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 1876 } else { /* mat block size as vertex size (used for elasticity) */ 1877 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 1878 } 1879 1880 /* Setup of Graph */ 1881 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 1882 1883 /* Graph's connected components analysis */ 1884 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 1885 1886 /* print some info to stdout */ 1887 if (pcbddc->dbg_flag) { 1888 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 1889 } 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 1895 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 1896 { 1897 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1898 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 1899 PetscErrorCode ierr; 1900 1901 PetscFunctionBegin; 1902 n = 0; 1903 vertices = 0; 1904 if (pcbddc->ConstraintMatrix) { 1905 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 1906 for (i=0;i<local_primal_size;i++) { 1907 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1908 if (size_of_constraint == 1) n++; 1909 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1910 } 1911 if (vertices_idx) { 1912 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 1913 n = 0; 1914 for (i=0;i<local_primal_size;i++) { 1915 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1916 if (size_of_constraint == 1) { 1917 vertices[n++]=row_cmat_indices[0]; 1918 } 1919 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1920 } 1921 } 1922 } 1923 *n_vertices = n; 1924 if (vertices_idx) *vertices_idx = vertices; 1925 PetscFunctionReturn(0); 1926 } 1927 1928 #undef __FUNCT__ 1929 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 1930 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 1931 { 1932 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1933 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 1934 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 1935 PetscBT touched; 1936 PetscErrorCode ierr; 1937 1938 /* This function assumes that the number of local constraints per connected component 1939 is not greater than the number of nodes defined for the connected component 1940 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 1941 PetscFunctionBegin; 1942 n = 0; 1943 constraints_index = 0; 1944 if (pcbddc->ConstraintMatrix) { 1945 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 1946 max_size_of_constraint = 0; 1947 for (i=0;i<local_primal_size;i++) { 1948 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1949 if (size_of_constraint > 1) { 1950 n++; 1951 } 1952 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 1953 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 1954 } 1955 if (constraints_idx) { 1956 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 1957 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 1958 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 1959 n = 0; 1960 for (i=0;i<local_primal_size;i++) { 1961 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1962 if (size_of_constraint > 1) { 1963 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 1964 /* find first untouched local node */ 1965 j = 0; 1966 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 1967 min_index = row_cmat_global_indices[j]; 1968 min_loc = j; 1969 /* search the minimum among nodes not yet touched on the connected component 1970 since there can be more than one constraint on a single cc */ 1971 for (j=1;j<size_of_constraint;j++) { 1972 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 1973 min_index = row_cmat_global_indices[j]; 1974 min_loc = j; 1975 } 1976 } 1977 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 1978 constraints_index[n++] = row_cmat_indices[min_loc]; 1979 } 1980 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 1981 } 1982 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1983 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 1984 } 1985 } 1986 *n_constraints = n; 1987 if (constraints_idx) *constraints_idx = constraints_index; 1988 PetscFunctionReturn(0); 1989 } 1990 1991 /* the next two functions has been adapted from pcis.c */ 1992 #undef __FUNCT__ 1993 #define __FUNCT__ "PCBDDCApplySchur" 1994 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 1995 { 1996 PetscErrorCode ierr; 1997 PC_IS *pcis = (PC_IS*)(pc->data); 1998 1999 PetscFunctionBegin; 2000 if (!vec2_B) { vec2_B = v; } 2001 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2002 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2003 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2004 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2005 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2011 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2012 { 2013 PetscErrorCode ierr; 2014 PC_IS *pcis = (PC_IS*)(pc->data); 2015 2016 PetscFunctionBegin; 2017 if (!vec2_B) { vec2_B = v; } 2018 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2019 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2020 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2021 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2022 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2023 PetscFunctionReturn(0); 2024 } 2025 2026 #undef __FUNCT__ 2027 #define __FUNCT__ "PCBDDCSubsetNumbering" 2028 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2029 { 2030 Vec local_vec,global_vec; 2031 IS seqis,paris; 2032 VecScatter scatter_ctx; 2033 PetscScalar *array; 2034 PetscInt *temp_global_dofs; 2035 PetscScalar globalsum; 2036 PetscInt i,j,s; 2037 PetscInt nlocals,first_index,old_index,max_local; 2038 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2039 PetscMPIInt *dof_sizes,*dof_displs; 2040 PetscBool first_found; 2041 PetscErrorCode ierr; 2042 2043 PetscFunctionBegin; 2044 /* mpi buffers */ 2045 MPI_Comm_size(comm,&size_prec_comm); 2046 MPI_Comm_rank(comm,&rank_prec_comm); 2047 j = ( !rank_prec_comm ? size_prec_comm : 0); 2048 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2049 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2050 /* get maximum size of subset */ 2051 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2052 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2053 max_local = 0; 2054 if (n_local_dofs) { 2055 max_local = temp_global_dofs[0]; 2056 for (i=1;i<n_local_dofs;i++) { 2057 if (max_local < temp_global_dofs[i] ) { 2058 max_local = temp_global_dofs[i]; 2059 } 2060 } 2061 } 2062 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2063 max_global++; 2064 max_local = 0; 2065 if (n_local_dofs) { 2066 max_local = local_dofs[0]; 2067 for (i=1;i<n_local_dofs;i++) { 2068 if (max_local < local_dofs[i] ) { 2069 max_local = local_dofs[i]; 2070 } 2071 } 2072 } 2073 max_local++; 2074 /* allocate workspace */ 2075 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2076 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2077 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2078 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2079 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2080 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2081 /* create scatter */ 2082 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2083 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2084 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2085 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2086 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2087 /* init array */ 2088 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2089 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2090 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2091 if (local_dofs_mult) { 2092 for (i=0;i<n_local_dofs;i++) { 2093 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2094 } 2095 } else { 2096 for (i=0;i<n_local_dofs;i++) { 2097 array[local_dofs[i]]=1.0; 2098 } 2099 } 2100 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2101 /* scatter into global vec and get total number of global dofs */ 2102 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2103 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2104 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2105 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2106 /* Fill global_vec with cumulative function for global numbering */ 2107 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2108 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2109 nlocals = 0; 2110 first_index = -1; 2111 first_found = PETSC_FALSE; 2112 for (i=0;i<s;i++) { 2113 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2114 first_found = PETSC_TRUE; 2115 first_index = i; 2116 } 2117 nlocals += (PetscInt)PetscRealPart(array[i]); 2118 } 2119 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2120 if (!rank_prec_comm) { 2121 dof_displs[0]=0; 2122 for (i=1;i<size_prec_comm;i++) { 2123 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2124 } 2125 } 2126 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2127 if (first_found) { 2128 array[first_index] += (PetscScalar)nlocals; 2129 old_index = first_index; 2130 for (i=first_index+1;i<s;i++) { 2131 if (PetscRealPart(array[i]) > 0.0) { 2132 array[i] += array[old_index]; 2133 old_index = i; 2134 } 2135 } 2136 } 2137 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2138 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2139 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2140 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2141 /* get global ordering of local dofs */ 2142 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2143 if (local_dofs_mult) { 2144 for (i=0;i<n_local_dofs;i++) { 2145 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2146 } 2147 } else { 2148 for (i=0;i<n_local_dofs;i++) { 2149 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2150 } 2151 } 2152 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2153 /* free workspace */ 2154 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2155 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2156 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2157 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2158 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2159 /* return pointer to global ordering of local dofs */ 2160 *global_numbering_subset = temp_global_dofs; 2161 PetscFunctionReturn(0); 2162 } 2163 2164 #undef __FUNCT__ 2165 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2166 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2167 { 2168 PetscInt i,j; 2169 PetscScalar *alphas; 2170 PetscErrorCode ierr; 2171 2172 PetscFunctionBegin; 2173 /* this implements stabilized Gram-Schmidt */ 2174 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2175 for (i=0;i<n;i++) { 2176 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2177 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2178 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2179 } 2180 ierr = PetscFree(alphas);CHKERRQ(ierr); 2181 PetscFunctionReturn(0); 2182 } 2183 2184 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2185 #undef __FUNCT__ 2186 #define __FUNCT__ "MatConvert_IS_AIJ" 2187 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2188 { 2189 Mat new_mat; 2190 Mat_IS *matis = (Mat_IS*)(mat->data); 2191 /* info on mat */ 2192 PetscInt bs,rows,cols; 2193 PetscInt lrows,lcols; 2194 PetscInt local_rows,local_cols; 2195 PetscMPIInt nsubdomains; 2196 /* preallocation */ 2197 Vec vec_dnz,vec_onz; 2198 PetscScalar *my_dnz,*my_onz,*array; 2199 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2200 PetscInt index_row,index_col,owner; 2201 PetscInt *local_indices,*global_indices; 2202 /* work */ 2203 PetscInt i,j; 2204 PetscErrorCode ierr; 2205 2206 PetscFunctionBegin; 2207 /* CHECKS */ 2208 /* get info from mat */ 2209 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2210 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2211 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2212 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2213 2214 /* MAT_INITIAL_MATRIX */ 2215 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2216 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2217 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2218 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2219 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2220 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2221 2222 /* 2223 preallocation 2224 */ 2225 2226 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2227 /* 2228 Some vectors are needed to sum up properly on shared interface dofs. 2229 Note that preallocation is not exact, since it overestimates nonzeros 2230 */ 2231 /* 2232 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2233 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2234 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2235 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2236 */ 2237 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2238 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2239 /* All processes need to compute entire row ownership */ 2240 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2241 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2242 for (i=0;i<nsubdomains;i++) { 2243 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2244 row_ownership[j]=i; 2245 } 2246 } 2247 /* map indices of local mat to global values */ 2248 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2249 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2250 for (i=0;i<local_rows;i++) local_indices[i]=i; 2251 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2252 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2253 2254 /* 2255 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2256 then, they will be summed up properly. This way, preallocation is always sufficient 2257 */ 2258 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2259 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2260 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2261 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2262 for (i=0;i<local_rows;i++) { 2263 index_row = global_indices[i]; 2264 for (j=i;j<local_rows;j++) { 2265 owner = row_ownership[index_row]; 2266 index_col = global_indices[j]; 2267 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2268 my_dnz[i] += 1.0; 2269 } else { /* offdiag block */ 2270 my_onz[i] += 1.0; 2271 } 2272 /* same as before, interchanging rows and cols */ 2273 if (i != j) { 2274 owner = row_ownership[index_col]; 2275 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2276 my_dnz[j] += 1.0; 2277 } else { 2278 my_onz[j] += 1.0; 2279 } 2280 } 2281 } 2282 } 2283 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2284 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2285 if (local_rows) { /* multilevel guard */ 2286 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2287 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2288 } 2289 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2290 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2291 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2292 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2293 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2294 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2295 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2296 2297 /* set computed preallocation in dnz and onz */ 2298 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2299 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2300 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2301 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2302 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2303 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2304 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2305 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2306 2307 /* Resize preallocation if overestimated */ 2308 for (i=0;i<lrows;i++) { 2309 dnz[i] = PetscMin(dnz[i],lcols); 2310 onz[i] = PetscMin(onz[i],cols-lcols); 2311 } 2312 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2313 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2314 2315 /* 2316 Set values. Very Basic. 2317 */ 2318 for (i=0;i<local_rows;i++) { 2319 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2320 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2321 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2322 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2323 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2324 } 2325 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2326 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2327 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2328 2329 /* get back mat */ 2330 *M = new_mat; 2331 PetscFunctionReturn(0); 2332 } 2333 2334 #undef __FUNCT__ 2335 #define __FUNCT__ "MatISSubassemble_Private" 2336 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2337 { 2338 Mat subdomain_adj; 2339 IS new_ranks,ranks_send_to; 2340 MatPartitioning partitioner; 2341 Mat_IS *matis; 2342 PetscInt n_neighs,*neighs,*n_shared,**shared; 2343 PetscInt prank; 2344 PetscMPIInt size,rank,color; 2345 PetscInt *xadj,*adjncy,*oldranks; 2346 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2347 PetscInt i,j,n_subdomains,local_size,threshold=0; 2348 PetscErrorCode ierr; 2349 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE,use_threshold=PETSC_FALSE; 2350 PetscSubcomm subcomm; 2351 2352 PetscFunctionBegin; 2353 ierr = PetscOptionsGetBool(NULL,"-matisc_use_square",&use_square,NULL);CHKERRQ(ierr); 2354 ierr = PetscOptionsGetBool(NULL,"-matisc_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2355 ierr = PetscOptionsGetInt(NULL,"-matisc_threshold",&threshold,&use_threshold);CHKERRQ(ierr); 2356 2357 /* Get info on mapping */ 2358 matis = (Mat_IS*)(mat->data); 2359 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2360 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2361 2362 /* build local CSR graph of subdomains' connectivity */ 2363 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2364 xadj[0] = 0; 2365 xadj[1] = PetscMax(n_neighs-1,0); 2366 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2367 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2368 2369 if (use_threshold) { 2370 PetscInt* count; 2371 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2372 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2373 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2374 for (j=0;j<n_shared[i];j++) { 2375 count[shared[i][j]] += 1; 2376 } 2377 } 2378 xadj[1] = 0; 2379 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2380 for (j=0;j<n_shared[i];j++) { 2381 if (count[shared[i][j]] < threshold) { 2382 adjncy[xadj[1]] = neighs[i]; 2383 adjncy_wgt[xadj[1]] = n_shared[i]; 2384 xadj[1]++; 2385 break; 2386 } 2387 } 2388 } 2389 ierr = PetscFree(count);CHKERRQ(ierr); 2390 } else { 2391 if (xadj[1]) { 2392 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2393 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2394 } 2395 } 2396 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2397 if (use_square) { 2398 for (i=0;i<xadj[1];i++) { 2399 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2400 } 2401 } 2402 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2403 2404 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2405 2406 /* 2407 Restrict work on active processes only. 2408 */ 2409 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2410 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2411 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2412 ierr = PetscMPIIntCast(!n_neighs,&color);CHKERRQ(ierr); 2413 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank,rank);CHKERRQ(ierr); 2414 if (color) { 2415 ierr = PetscFree(xadj);CHKERRQ(ierr); 2416 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2417 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2418 } else { 2419 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2420 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2421 prank = rank; 2422 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2423 for (i=0;i<size;i++) { 2424 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2425 } 2426 for (i=0;i<xadj[1];i++) { 2427 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2428 } 2429 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2430 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2431 n_subdomains = (PetscInt)size; 2432 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2433 2434 /* Partition */ 2435 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2436 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2437 if (use_vwgt) { 2438 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2439 v_wgt[0] = local_size; 2440 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2441 } 2442 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2443 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2444 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2445 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2446 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2447 ierr = ISView(new_ranks,0);CHKERRQ(ierr); 2448 2449 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2450 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2451 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2452 /* clean up */ 2453 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2454 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2455 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2456 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2457 } 2458 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2459 2460 /* assemble parallel IS for sends */ 2461 i = 1; 2462 if (color) i=0; 2463 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2464 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2465 2466 /* get back IS */ 2467 *is_sends = ranks_send_to; 2468 PetscFunctionReturn(0); 2469 } 2470 2471 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2472 2473 #undef __FUNCT__ 2474 #define __FUNCT__ "MatISSubassemble" 2475 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2476 { 2477 Mat local_mat,new_mat; 2478 Mat_IS *matis; 2479 IS is_sends_internal; 2480 PetscInt rows,cols; 2481 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2482 PetscBool ismatis,isdense; 2483 ISLocalToGlobalMapping l2gmap; 2484 PetscInt* l2gmap_indices; 2485 const PetscInt* is_indices; 2486 MatType new_local_type; 2487 MatTypePrivate new_local_type_private; 2488 /* buffers */ 2489 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2490 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2491 /* MPI */ 2492 MPI_Comm comm; 2493 PetscMPIInt n_sends,n_recvs,commsize; 2494 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2495 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2496 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2497 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2498 PetscErrorCode ierr; 2499 2500 PetscFunctionBegin; 2501 /* checks */ 2502 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2503 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2504 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2505 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2506 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2507 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2508 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2509 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2510 PetscValidLogicalCollectiveInt(mat,bs,0); 2511 /* prepare IS for sending if not provided */ 2512 if (!is_sends) { 2513 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2514 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2515 } else { 2516 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2517 is_sends_internal = is_sends; 2518 } 2519 2520 /* get pointer of MATIS data */ 2521 matis = (Mat_IS*)mat->data; 2522 2523 /* get comm */ 2524 comm = PetscObjectComm((PetscObject)mat); 2525 2526 /* compute number of sends */ 2527 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2528 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2529 2530 /* compute number of receives */ 2531 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2532 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2533 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2534 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2535 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2536 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2537 ierr = PetscFree(iflags);CHKERRQ(ierr); 2538 2539 /* prepare send/receive buffers */ 2540 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2541 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2542 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2543 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2544 2545 /* Get data from local mat */ 2546 if (!isdense) { 2547 /* TODO: See below some guidelines on how to prepare the local buffers */ 2548 /* 2549 send_buffer_vals should contain the raw values of the local matrix 2550 send_buffer_idxs should contain: 2551 - MatType_PRIVATE type 2552 - PetscInt size_of_l2gmap 2553 - PetscInt global_row_indices[size_of_l2gmap] 2554 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2555 */ 2556 } else { 2557 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2558 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2559 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2560 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2561 send_buffer_idxs[1] = i; 2562 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2563 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2564 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2565 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2566 for (i=0;i<n_sends;i++) { 2567 ilengths_vals[is_indices[i]] = len*len; 2568 ilengths_idxs[is_indices[i]] = len+2; 2569 } 2570 } 2571 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2572 buf_size_idxs = 0; 2573 buf_size_vals = 0; 2574 for (i=0;i<n_recvs;i++) { 2575 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2576 buf_size_vals += (PetscInt)olengths_vals[i]; 2577 } 2578 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2579 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2580 2581 /* get new tags for clean communications */ 2582 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2583 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2584 2585 /* allocate for requests */ 2586 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2587 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2588 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2589 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2590 2591 /* communications */ 2592 ptr_idxs = recv_buffer_idxs; 2593 ptr_vals = recv_buffer_vals; 2594 for (i=0;i<n_recvs;i++) { 2595 source_dest = onodes[i]; 2596 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2597 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2598 ptr_idxs += olengths_idxs[i]; 2599 ptr_vals += olengths_vals[i]; 2600 } 2601 for (i=0;i<n_sends;i++) { 2602 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2603 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2604 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2605 } 2606 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2607 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2608 2609 /* assemble new l2g map */ 2610 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2611 ptr_idxs = recv_buffer_idxs; 2612 buf_size_idxs = 0; 2613 for (i=0;i<n_recvs;i++) { 2614 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2615 ptr_idxs += olengths_idxs[i]; 2616 } 2617 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2618 ptr_idxs = recv_buffer_idxs; 2619 buf_size_idxs = 0; 2620 for (i=0;i<n_recvs;i++) { 2621 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2622 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2623 ptr_idxs += olengths_idxs[i]; 2624 } 2625 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2626 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2627 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2628 2629 /* infer new local matrix type from received local matrices type */ 2630 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2631 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2632 new_local_type_private = MATAIJ_PRIVATE; 2633 new_local_type = MATSEQAIJ; 2634 if (n_recvs) { 2635 new_local_type_private = send_buffer_idxs[0]; 2636 ptr_idxs = recv_buffer_idxs; 2637 for (i=0;i<n_recvs;i++) { 2638 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2639 new_local_type_private = MATAIJ_PRIVATE; 2640 break; 2641 } 2642 ptr_idxs += olengths_idxs[i]; 2643 } 2644 switch (new_local_type_private) { 2645 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2646 new_local_type = MATSEQAIJ; 2647 bs = 1; 2648 break; 2649 case MATAIJ_PRIVATE: 2650 new_local_type = MATSEQAIJ; 2651 bs = 1; 2652 break; 2653 case MATBAIJ_PRIVATE: 2654 new_local_type = MATSEQBAIJ; 2655 break; 2656 case MATSBAIJ_PRIVATE: 2657 new_local_type = MATSEQSBAIJ; 2658 break; 2659 default: 2660 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2661 break; 2662 } 2663 } 2664 2665 /* create MATIS object */ 2666 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2667 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2668 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2669 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2670 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2671 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2672 2673 /* set values */ 2674 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2675 ptr_vals = recv_buffer_vals; 2676 ptr_idxs = recv_buffer_idxs; 2677 for (i=0;i<n_recvs;i++) { 2678 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2679 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2680 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2681 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2682 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2683 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2684 } 2685 ptr_idxs += olengths_idxs[i]; 2686 ptr_vals += olengths_vals[i]; 2687 } 2688 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2689 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2690 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2691 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2692 2693 { /* check */ 2694 Vec lvec,rvec; 2695 PetscReal infty_error; 2696 2697 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2698 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2699 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2700 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2701 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2702 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2703 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2704 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2705 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2706 } 2707 2708 /* free workspace */ 2709 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2710 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2711 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2712 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2713 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2714 if (isdense) { 2715 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2716 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2717 } else { 2718 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2719 } 2720 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2721 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2722 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2723 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2724 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2725 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2726 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2727 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2728 ierr = PetscFree(onodes);CHKERRQ(ierr); 2729 /* get back new mat */ 2730 *mat_n = new_mat; 2731 PetscFunctionReturn(0); 2732 } 2733 2734 #undef __FUNCT__ 2735 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2736 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2737 { 2738 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2739 PC_IS *pcis = (PC_IS*)pc->data; 2740 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2741 MatNullSpace CoarseNullSpace=NULL; 2742 ISLocalToGlobalMapping coarse_islg; 2743 IS coarse_is; 2744 PetscInt coarse_size,im_active,active_procs,*local_primal_indices=NULL; 2745 PCType coarse_pc_type; 2746 KSPType coarse_ksp_type; 2747 PetscBool setsym,issym=PETSC_FALSE; 2748 PetscErrorCode ierr; 2749 2750 PetscFunctionBegin; 2751 /* TODO: remove this and coarse ennums */ 2752 if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC || pcbddc->coarse_problem_type == REPLICATED_BDDC) { 2753 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2754 } 2755 2756 /* Assign global numbering to coarse dofs */ 2757 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2758 2759 /* count "active processes" */ 2760 im_active = 0; 2761 if (pcis->n) im_active = 1; 2762 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2763 2764 /* adapt coarse problem type if needed */ 2765 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2766 if (pcbddc->current_level < pcbddc->max_levels) { 2767 if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) { 2768 if (pcbddc->dbg_flag) { 2769 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active %d,ratio %d). Using direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2770 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2771 } 2772 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2773 } 2774 } else { 2775 if (pcbddc->dbg_flag) { 2776 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2777 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2778 } 2779 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2780 } 2781 } 2782 2783 /* creates MATIS object for coarse matrix */ 2784 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 2785 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 2786 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 2787 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 2788 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 2789 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2790 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2791 2792 /* Convert or subassemble MATIS depending on chosen coarse solver approach */ 2793 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 2794 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 2795 /* just for now */ 2796 coarse_pc_type = PCREDUNDANT; 2797 coarse_ksp_type = KSPPREONLY; 2798 } else { 2799 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 2800 /* just for now */ 2801 coarse_pc_type = PCBDDC; 2802 coarse_ksp_type = KSPRICHARDSON; 2803 } 2804 2805 /* propagate symmetry info to coarse matrix */ 2806 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2807 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 2808 2809 /* create local to global scatters coarse problem */ 2810 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2811 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2812 2813 /* free memory no longer needed */ 2814 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 2815 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 2816 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 2817 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 2818 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 2819 2820 /* Compute coarse null space */ 2821 if (pcbddc->NullSpace) { 2822 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 2823 } 2824 2825 /* KSP for coarse problem */ 2826 { 2827 PC pc_temp; 2828 PetscBool isbddc=PETSC_FALSE; 2829 PetscInt max_it; 2830 2831 if (!pcbddc->coarse_ksp) { /* create the coarse KSP once */ 2832 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 2833 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2834 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 2835 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2836 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2837 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2838 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2839 } 2840 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2841 /* Allow user's customization */ 2842 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 2843 /* Set Up PC for coarse problem BDDC */ 2844 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2845 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 2846 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 2847 ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 2848 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 2849 if (CoarseNullSpace) { 2850 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 2851 } 2852 if (pcbddc->dbg_flag) { 2853 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"----------------Level %d -> Setting up next level---------------\n",pcbddc->current_level);CHKERRQ(ierr); 2854 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2855 } 2856 } else { 2857 if (CoarseNullSpace) { 2858 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 2859 } 2860 } 2861 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2862 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2863 2864 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 2865 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2866 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 2867 if (max_it == 1) { 2868 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 2869 if (isbddc) { 2870 ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr); 2871 } 2872 } 2873 } 2874 2875 /* Check coarse problem if requested */ 2876 if (pcbddc->dbg_flag) { 2877 KSP check_ksp; 2878 PC check_pc; 2879 Vec check_vec; 2880 PetscInt its; 2881 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 2882 KSPType check_ksp_type; 2883 2884 /* Create ksp object suitable for extreme eigenvalues' estimation */ 2885 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&check_ksp);CHKERRQ(ierr); 2886 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2887 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 2888 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2889 if (issym) check_ksp_type = KSPCG; 2890 else check_ksp_type = KSPGMRES; 2891 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 2892 } else { 2893 check_ksp_type = KSPPREONLY; 2894 } 2895 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 2896 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 2897 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 2898 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 2899 /* create random vec */ 2900 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 2901 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 2902 if (CoarseNullSpace) { 2903 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 2904 } 2905 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2906 /* solve coarse problem */ 2907 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2908 if (CoarseNullSpace) { 2909 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 2910 } 2911 /* check coarse problem residual error */ 2912 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 2913 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2914 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2915 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 2916 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 2917 /* get eigenvalue estimation if inexact */ 2918 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2919 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 2920 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 2921 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",its,check_ksp_type);CHKERRQ(ierr); 2922 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 2923 } 2924 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.14e\n",infty_error);CHKERRQ(ierr); 2925 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr); 2926 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 2927 } 2928 if (pcbddc->dbg_flag) { 2929 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2930 } 2931 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 2932 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 2933 PetscFunctionReturn(0); 2934 } 2935 2936 #undef __FUNCT__ 2937 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 2938 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 2939 { 2940 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2941 PC_IS* pcis = (PC_IS*)pc->data; 2942 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2943 PetscInt i,j,coarse_size; 2944 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 2945 PetscErrorCode ierr; 2946 2947 PetscFunctionBegin; 2948 /* get indices in local ordering for vertices and constraints */ 2949 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 2950 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 2951 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 2952 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2953 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 2954 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 2955 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 2956 2957 /* Compute global number of coarse dofs */ 2958 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2959 2960 /* check numbering */ 2961 if (pcbddc->dbg_flag) { 2962 PetscScalar coarsesum,*array; 2963 2964 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2965 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2966 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 2967 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2968 for (i=0;i<pcbddc->local_primal_size;i++) { 2969 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2970 } 2971 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2972 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2973 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2974 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2975 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2976 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2977 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2978 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2979 for (i=0;i<pcis->n;i++) { 2980 if (array[i] == 1.0) { 2981 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 2982 } 2983 } 2984 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2985 for (i=0;i<pcis->n;i++) { 2986 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 2987 } 2988 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2989 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2990 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2991 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2992 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 2993 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 2994 if (pcbddc->dbg_flag > 1) { 2995 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 2996 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2997 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2998 for (i=0;i<pcbddc->local_primal_size;i++) { 2999 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]); 3000 } 3001 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3002 } 3003 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3004 } 3005 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3006 /* get back data */ 3007 *coarse_size_n = coarse_size; 3008 *local_primal_indices_n = local_primal_indices; 3009 PetscFunctionReturn(0); 3010 } 3011 3012