1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCResetCustomization" 47 PetscErrorCode PCBDDCResetCustomization(PC pc) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 PetscInt i; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 56 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 59 for (i=0;i<pcbddc->n_ISForDofs;i++) { 60 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 61 } 62 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 63 pcbddc->n_ISForDofs = 0; 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "PCBDDCResetTopography" 69 PetscErrorCode PCBDDCResetTopography(PC pc) 70 { 71 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 72 PetscErrorCode ierr; 73 74 PetscFunctionBegin; 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 102 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "PCBDDCCreateWorkVectors" 111 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 112 { 113 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 114 PC_IS *pcis = (PC_IS*)pc->data; 115 VecType impVecType; 116 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 117 PetscErrorCode ierr; 118 119 PetscFunctionBegin; 120 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 121 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 122 local_primal_size = n_constraints+n_vertices; 123 n_R = pcis->n-n_vertices; 124 /* local work vectors */ 125 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 126 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 127 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 128 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 129 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 130 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 131 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 132 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 133 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 134 if (n_constraints) { 135 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 136 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 137 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "PCBDDCSetUpCorrection" 144 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 145 { 146 PetscErrorCode ierr; 147 /* pointers to pcis and pcbddc */ 148 PC_IS* pcis = (PC_IS*)pc->data; 149 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 150 /* submatrices of local problem */ 151 Mat A_RV,A_VR,A_VV; 152 /* working matrices */ 153 Mat M1,M2,M3,C_CR; 154 /* working vectors */ 155 Vec vec1_C,vec2_C,vec1_V,vec2_V; 156 /* additional working stuff */ 157 IS is_aux; 158 ISLocalToGlobalMapping BtoNmap; 159 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 160 const PetscScalar *array,*row_cmat_values; 161 const PetscInt *row_cmat_indices,*idx_R_local; 162 PetscInt *vertices,*idx_V_B,*auxindices; 163 PetscInt n_vertices,n_constraints,size_of_constraint; 164 PetscInt i,j,n_R,n_D,n_B; 165 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 166 /* Vector and matrix types */ 167 VecType impVecType; 168 MatType impMatType; 169 /* some shortcuts to scalars */ 170 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 171 /* for debugging purposes */ 172 PetscReal *coarsefunctions_errors,*constraints_errors; 173 174 PetscFunctionBegin; 175 /* get number of vertices and their local indices */ 176 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 177 n_constraints = pcbddc->local_primal_size-n_vertices; 178 /* Set Non-overlapping dimensions */ 179 n_B = pcis->n_B; n_D = pcis->n - n_B; 180 n_R = pcis->n-n_vertices; 181 182 /* Set types for local objects needed by BDDC precondtioner */ 183 impMatType = MATSEQDENSE; 184 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 185 186 /* Allocating some extra storage just to be safe */ 187 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 188 for (i=0;i<pcis->n;i++) auxindices[i]=i; 189 190 /* vertices in boundary numbering */ 191 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 192 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 193 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 194 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 195 if (i != n_vertices) { 196 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 197 } 198 199 /* some work vectors on vertices and/or constraints */ 200 if (n_vertices) { 201 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 202 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 203 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 204 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 205 } 206 if (n_constraints) { 207 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 208 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 209 } 210 211 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 212 if (n_constraints) { 213 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 214 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 215 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 216 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 217 218 /* Extract constraints on R nodes: C_{CR} */ 219 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 220 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 221 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 222 223 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 224 for (i=0;i<n_constraints;i++) { 225 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 226 /* Get row of constraint matrix in R numbering */ 227 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 228 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 229 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 230 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 231 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 232 /* Solve for row of constraint matrix in R numbering */ 233 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 234 /* Set values in local_auxmat2 */ 235 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 236 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 237 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 238 } 239 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 240 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 241 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 242 243 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 244 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 245 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 246 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 247 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 248 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 249 ierr = MatSetUp(M1);CHKERRQ(ierr); 250 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 251 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 252 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 253 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 254 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 255 ierr = MatDestroy(&M2);CHKERRQ(ierr); 256 ierr = MatDestroy(&M3);CHKERRQ(ierr); 257 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 258 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 259 } 260 261 /* Get submatrices from subdomain matrix */ 262 if (n_vertices) { 263 PetscInt ibs,mbs; 264 PetscBool issbaij; 265 Mat newmat; 266 267 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 268 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 269 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 270 if (ibs != mbs) { /* need to convert to SEQAIJ */ 271 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 272 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 273 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 274 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 275 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 276 } else { 277 /* this is safe */ 278 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 279 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 280 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 281 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 282 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 283 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 284 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 285 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 286 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 287 } else { 288 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 289 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 290 } 291 } 292 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 293 } 294 295 /* Matrix of coarse basis functions (local) */ 296 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 297 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 298 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 299 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 300 if (pcbddc->switch_static || pcbddc->dbg_flag) { 301 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 302 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 303 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 304 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 305 } 306 307 if (pcbddc->dbg_flag) { 308 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 309 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 310 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 311 } 312 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 313 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 314 315 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 316 317 /* vertices */ 318 for (i=0;i<n_vertices;i++) { 319 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 320 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 321 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 322 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 323 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 324 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 325 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 326 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 327 if (n_constraints) { 328 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 329 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 330 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 331 } 332 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 333 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 334 335 /* Set values in coarse basis function and subdomain part of coarse_mat */ 336 /* coarse basis functions */ 337 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 338 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 339 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 340 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 341 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 342 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 343 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 344 if (pcbddc->switch_static || pcbddc->dbg_flag) { 345 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 346 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 347 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 348 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 349 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 350 } 351 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 352 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 353 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 354 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 355 if (n_constraints) { 356 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 357 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 358 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 359 } 360 361 /* check */ 362 if (pcbddc->dbg_flag) { 363 /* assemble subdomain vector on local nodes */ 364 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 365 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 366 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 367 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 368 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 369 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 370 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 371 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 372 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 373 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 374 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 375 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 376 if (n_constraints) { 377 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 378 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 379 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 380 } 381 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 382 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 383 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 384 /* check saddle point solution */ 385 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 386 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 387 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 388 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 389 /* shift by the identity matrix */ 390 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 391 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 392 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 393 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 394 } 395 } 396 397 /* constraints */ 398 for (i=0;i<n_constraints;i++) { 399 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 400 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 401 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 402 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 403 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 404 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 405 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 406 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 407 if (n_vertices) { 408 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 409 } 410 /* Set values in coarse basis function and subdomain part of coarse_mat */ 411 /* coarse basis functions */ 412 j = i+n_vertices; /* don't touch this! */ 413 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 414 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 415 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 416 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 417 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 418 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 419 if (pcbddc->switch_static || pcbddc->dbg_flag) { 420 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 421 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 422 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 423 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 425 } 426 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 427 if (n_vertices) { 428 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 429 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 430 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 431 } 432 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 433 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 434 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 435 436 if (pcbddc->dbg_flag) { 437 /* assemble subdomain vector on nodes */ 438 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 439 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 440 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 441 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 442 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 443 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 444 /* assemble subdomain vector of lagrange multipliers */ 445 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 446 if (n_vertices) { 447 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 448 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 449 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 450 } 451 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 452 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 453 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 454 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 455 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 456 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 457 /* check saddle point solution */ 458 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 459 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 460 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 461 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 462 /* shift by the identity matrix */ 463 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 464 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 465 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 466 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 467 } 468 } 469 /* call assembling routines for local coarse basis */ 470 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 471 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 472 if (pcbddc->switch_static || pcbddc->dbg_flag) { 473 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 474 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 475 } 476 477 /* compute other basis functions for non-symmetric problems */ 478 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 479 if (!setsym || (setsym && !issym)) { 480 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 481 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 482 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 483 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 484 if (pcbddc->switch_static || pcbddc->dbg_flag) { 485 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 486 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 487 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 488 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 489 } 490 for (i=0;i<pcbddc->local_primal_size;i++) { 491 if (n_constraints) { 492 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 493 for (j=0;j<n_constraints;j++) { 494 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 495 } 496 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 497 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 498 } 499 if (i<n_vertices) { 500 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 501 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 502 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 503 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 504 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 505 if (n_constraints) { 506 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 507 } 508 } else { 509 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 510 } 511 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 512 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 513 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 514 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 515 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 516 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 517 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 518 if (i<n_vertices) { 519 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 520 } 521 if (pcbddc->switch_static || pcbddc->dbg_flag) { 522 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 523 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 524 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 525 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 526 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 527 } 528 529 if (pcbddc->dbg_flag) { 530 /* assemble subdomain vector on nodes */ 531 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 532 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 533 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 534 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 535 if (i<n_vertices) { 536 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 537 } 538 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 539 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 540 /* assemble subdomain vector of lagrange multipliers */ 541 for (j=0;j<pcbddc->local_primal_size;j++) { 542 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 543 } 544 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 545 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 546 /* check saddle point solution */ 547 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 548 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 549 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 550 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 551 /* shift by the identity matrix */ 552 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 553 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 554 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 555 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 556 } 557 } 558 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 559 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 560 if (pcbddc->switch_static || pcbddc->dbg_flag) { 561 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 562 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 563 } 564 } 565 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 566 /* Checking coarse_sub_mat and coarse basis functios */ 567 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 568 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 569 if (pcbddc->dbg_flag) { 570 Mat coarse_sub_mat; 571 Mat AUXMAT,TM1,TM2,TM3,TM4; 572 Mat coarse_phi_D,coarse_phi_B; 573 Mat coarse_psi_D,coarse_psi_B; 574 Mat A_II,A_BB,A_IB,A_BI; 575 MatType checkmattype=MATSEQAIJ; 576 PetscReal real_value; 577 578 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 579 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 580 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 581 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 582 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 583 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 584 if (pcbddc->coarse_psi_B) { 585 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 586 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 587 } 588 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 589 590 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 591 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 592 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 593 if (pcbddc->coarse_psi_B) { 594 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 595 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 596 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 597 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 598 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 599 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 600 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 601 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 602 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 603 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 604 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 605 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 606 } else { 607 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 608 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 609 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 610 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 611 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 612 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 613 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 614 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 615 } 616 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 617 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 618 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 619 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 620 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 621 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 623 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 624 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 625 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 626 for (i=0;i<pcbddc->local_primal_size;i++) { 627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 628 } 629 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 630 for (i=0;i<pcbddc->local_primal_size;i++) { 631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 632 } 633 if (pcbddc->coarse_psi_B) { 634 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 635 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 636 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 637 } 638 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 639 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 640 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 641 } 642 } 643 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 644 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 645 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 646 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 647 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 648 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 649 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 650 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 651 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 652 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 653 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 654 if (pcbddc->coarse_psi_B) { 655 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 656 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 657 } 658 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 659 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 660 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 661 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 662 } 663 /* free memory */ 664 if (n_vertices) { 665 ierr = PetscFree(vertices);CHKERRQ(ierr); 666 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 667 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 668 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 669 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 670 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 671 } 672 if (n_constraints) { 673 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 674 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 675 ierr = MatDestroy(&M1);CHKERRQ(ierr); 676 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 677 } 678 ierr = PetscFree(auxindices);CHKERRQ(ierr); 679 /* get back data */ 680 *coarse_submat_vals_n = coarse_submat_vals; 681 PetscFunctionReturn(0); 682 } 683 684 #undef __FUNCT__ 685 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 686 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 687 { 688 PC_IS* pcis = (PC_IS*)(pc->data); 689 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 690 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 691 PetscBool issbaij,isbaij; 692 /* manage repeated solves */ 693 MatReuse reuse; 694 MatStructure matstruct; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 /* get mat flags */ 699 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 700 reuse = MAT_INITIAL_MATRIX; 701 if (pc->setupcalled) { 702 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 703 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 704 if (matstruct == SAME_NONZERO_PATTERN) { 705 reuse = MAT_REUSE_MATRIX; 706 } else { 707 reuse = MAT_INITIAL_MATRIX; 708 } 709 } 710 if (reuse == MAT_INITIAL_MATRIX) { 711 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 712 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 713 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 714 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 715 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 716 } 717 718 /* transform local matrices if needed */ 719 if (pcbddc->use_change_of_basis) { 720 Mat change_mat_all; 721 PetscScalar *row_cmat_values; 722 PetscInt *row_cmat_indices; 723 PetscInt *nnz,*is_indices,*temp_indices; 724 PetscInt i,j,k,n_D,n_B; 725 726 /* Get Non-overlapping dimensions */ 727 n_B = pcis->n_B; 728 n_D = pcis->n-n_B; 729 730 /* compute nonzero structure of change of basis on all local nodes */ 731 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 732 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 733 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 734 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 735 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 736 k=1; 737 for (i=0;i<n_B;i++) { 738 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 739 nnz[is_indices[i]]=j; 740 if (k < j) k = j; 741 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 742 } 743 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 744 /* assemble change of basis matrix on the whole set of local dofs */ 745 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 746 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 747 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 748 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 749 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 750 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 751 for (i=0;i<n_D;i++) { 752 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 753 } 754 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 755 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 756 for (i=0;i<n_B;i++) { 757 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 758 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 759 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 760 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 761 } 762 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 763 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 764 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 765 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 766 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 767 if (!issbaij && !isbaij) { 768 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 769 } else { 770 Mat work_mat; 771 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 772 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 773 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 774 } 775 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 776 ierr = PetscFree(nnz);CHKERRQ(ierr); 777 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 778 } else { 779 /* without change of basis, the local matrix is unchanged */ 780 if (!pcbddc->local_mat) { 781 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 782 pcbddc->local_mat = matis->A; 783 } 784 } 785 786 /* get submatrices */ 787 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 788 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 789 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 790 if (!issbaij) { 791 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 792 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 793 } else { 794 Mat newmat; 795 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 796 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 797 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 798 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 799 } 800 PetscFunctionReturn(0); 801 } 802 803 #undef __FUNCT__ 804 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 805 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 806 { 807 PC_IS* pcis = (PC_IS*)(pc->data); 808 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 809 IS is_aux1,is_aux2; 810 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 811 PetscInt n_vertices,i,j,n_R,n_D,n_B; 812 PetscInt vbs,bs; 813 PetscBT bitmask; 814 PetscErrorCode ierr; 815 816 PetscFunctionBegin; 817 /* Set Non-overlapping dimensions */ 818 n_B = pcis->n_B; n_D = pcis->n - n_B; 819 /* get vertex indices from constraint matrix */ 820 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 821 822 /* create auxiliary bitmask */ 823 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 824 for (i=0;i<n_vertices;i++) { 825 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 826 } 827 828 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 829 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 830 for (i=0, n_R=0; i<pcis->n; i++) { 831 if (!PetscBTLookup(bitmask,i)) { 832 idx_R_local[n_R] = i; 833 n_R++; 834 } 835 } 836 837 /* Block code */ 838 vbs = 1; 839 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 840 if (bs>1 && !(n_vertices%bs)) { 841 PetscBool is_blocked = PETSC_TRUE; 842 PetscInt *vary; 843 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 844 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 845 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 846 for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++; 847 for (i=0; i<n_vertices; i++) { 848 if (vary[i]!=0 && vary[i]!=bs) { 849 is_blocked = PETSC_FALSE; 850 break; 851 } 852 } 853 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 854 vbs = bs; 855 for (i=0;i<n_R/vbs;i++) { 856 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 857 } 858 } 859 ierr = PetscFree(vary);CHKERRQ(ierr); 860 } 861 ierr = PetscFree(vertices);CHKERRQ(ierr); 862 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 863 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 864 865 /* print some info if requested */ 866 if (pcbddc->dbg_flag) { 867 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 868 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 869 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 870 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 871 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 872 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 873 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 874 } 875 876 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 877 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 878 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 879 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 880 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 881 for (i=0; i<n_D; i++) { 882 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 883 } 884 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 885 for (i=0, j=0; i<n_R; i++) { 886 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 887 aux_array1[j++] = i; 888 } 889 } 890 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 891 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 892 for (i=0, j=0; i<n_B; i++) { 893 if (!PetscBTLookup(bitmask,is_indices[i])) { 894 aux_array2[j++] = i; 895 } 896 } 897 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 898 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 899 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 900 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 901 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 902 903 if (pcbddc->switch_static || pcbddc->dbg_flag) { 904 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 905 for (i=0, j=0; i<n_R; i++) { 906 if (PetscBTLookup(bitmask,idx_R_local[i])) { 907 aux_array1[j++] = i; 908 } 909 } 910 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 911 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 912 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 913 } 914 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 915 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 916 PetscFunctionReturn(0); 917 } 918 919 920 #undef __FUNCT__ 921 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 922 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 923 { 924 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 925 PC_IS *pcis = (PC_IS*)pc->data; 926 PC pc_temp; 927 Mat A_RR; 928 Vec vec1,vec2,vec3; 929 MatStructure matstruct; 930 PetscScalar m_one = -1.0; 931 PetscReal value; 932 PetscInt n_D,n_R,ibs,mbs; 933 PetscBool use_exact,use_exact_reduced,issbaij; 934 PetscErrorCode ierr; 935 /* prefixes stuff */ 936 char dir_prefix[256],neu_prefix[256],str_level[3]; 937 size_t len; 938 939 PetscFunctionBegin; 940 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 941 942 /* compute prefixes */ 943 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 944 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 945 if (!pcbddc->current_level) { 946 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 947 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 948 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 949 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 950 } else { 951 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 952 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 953 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 954 len -= 15; /* remove "pc_bddc_coarse_" */ 955 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 956 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 957 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 958 *(dir_prefix+len)='\0'; 959 *(neu_prefix+len)='\0'; 960 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 961 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 962 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 963 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 964 } 965 966 /* DIRICHLET PROBLEM */ 967 /* Matrix for Dirichlet problem is pcis->A_II */ 968 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 969 if (!pcbddc->ksp_D) { /* create object if not yet build */ 970 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 971 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 972 /* default */ 973 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 974 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 975 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 976 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 977 if (issbaij) { 978 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 979 } else { 980 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 981 } 982 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 983 } 984 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 985 /* Allow user's customization */ 986 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 987 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 988 if (!n_D) { 989 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 990 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 991 } 992 /* Set Up KSP for Dirichlet problem of BDDC */ 993 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 994 /* set ksp_D into pcis data */ 995 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 996 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 997 pcis->ksp_D = pcbddc->ksp_D; 998 999 /* NEUMANN PROBLEM */ 1000 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1001 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1002 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1003 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1004 if (ibs != mbs) { 1005 Mat newmat; 1006 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1007 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1008 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1009 } else { 1010 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1011 } 1012 if (!pcbddc->ksp_R) { /* create object if not yet build */ 1013 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1014 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1015 /* default */ 1016 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1017 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1018 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1019 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1020 if (issbaij) { 1021 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1022 } else { 1023 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1024 } 1025 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1026 } 1027 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1028 /* Allow user's customization */ 1029 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1030 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1031 if (!n_R) { 1032 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1033 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1034 } 1035 /* Set Up KSP for Neumann problem of BDDC */ 1036 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1037 1038 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1039 1040 /* Dirichlet */ 1041 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 1042 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1043 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1044 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 1045 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 1046 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1047 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1048 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1049 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1050 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1051 /* need to be adapted? */ 1052 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1053 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1054 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1055 /* print info */ 1056 if (pcbddc->dbg_flag) { 1057 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1058 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1059 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1060 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1061 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1062 } 1063 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1064 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1065 } 1066 1067 /* Neumann */ 1068 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 1069 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1070 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1071 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 1072 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 1073 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1074 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1075 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1076 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1077 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1078 /* need to be adapted? */ 1079 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1080 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1081 /* print info */ 1082 if (pcbddc->dbg_flag) { 1083 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1084 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1085 } 1086 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1087 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1088 } 1089 /* free Neumann problem's matrix */ 1090 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1096 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1097 { 1098 PetscErrorCode ierr; 1099 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1100 1101 PetscFunctionBegin; 1102 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1103 if (pcbddc->local_auxmat1) { 1104 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1105 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1106 } 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1112 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1113 { 1114 PetscErrorCode ierr; 1115 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1116 PC_IS* pcis = (PC_IS*) (pc->data); 1117 const PetscScalar zero = 0.0; 1118 1119 PetscFunctionBegin; 1120 /* Application of PHI^T (or PSI^T) */ 1121 if (pcbddc->coarse_psi_B) { 1122 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1123 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1124 } else { 1125 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1126 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1127 } 1128 /* Scatter data of coarse_rhs */ 1129 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1130 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1131 1132 /* Local solution on R nodes */ 1133 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1134 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1135 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1136 if (pcbddc->switch_static) { 1137 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1139 } 1140 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1141 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1142 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1143 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1144 if (pcbddc->switch_static) { 1145 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1146 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1147 } 1148 1149 /* Coarse solution */ 1150 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1151 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1152 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1153 } 1154 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1155 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1156 1157 /* Sum contributions from two levels */ 1158 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1159 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1160 PetscFunctionReturn(0); 1161 } 1162 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1165 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1166 { 1167 PetscErrorCode ierr; 1168 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1169 1170 PetscFunctionBegin; 1171 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1177 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1178 { 1179 PetscErrorCode ierr; 1180 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1181 1182 PetscFunctionBegin; 1183 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1184 PetscFunctionReturn(0); 1185 } 1186 1187 /* uncomment for testing purposes */ 1188 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1191 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1192 { 1193 PetscErrorCode ierr; 1194 PC_IS* pcis = (PC_IS*)(pc->data); 1195 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1196 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1197 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1198 MatType impMatType=MATSEQAIJ; 1199 /* one and zero */ 1200 PetscScalar one=1.0,zero=0.0; 1201 /* space to store constraints and their local indices */ 1202 PetscScalar *temp_quadrature_constraint; 1203 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1204 /* iterators */ 1205 PetscInt i,j,k,total_counts,temp_start_ptr; 1206 /* stuff to store connected components stored in pcbddc->mat_graph */ 1207 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1208 PetscInt n_ISForFaces,n_ISForEdges; 1209 /* near null space stuff */ 1210 MatNullSpace nearnullsp; 1211 const Vec *nearnullvecs; 1212 Vec *localnearnullsp; 1213 PetscBool nnsp_has_cnst; 1214 PetscInt nnsp_size; 1215 PetscScalar *array; 1216 /* BLAS integers */ 1217 PetscBLASInt lwork,lierr; 1218 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1219 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1220 /* LAPACK working arrays for SVD or POD */ 1221 PetscBool skip_lapack; 1222 PetscScalar *work; 1223 PetscReal *singular_vals; 1224 #if defined(PETSC_USE_COMPLEX) 1225 PetscReal *rwork; 1226 #endif 1227 #if defined(PETSC_MISSING_LAPACK_GESVD) 1228 PetscBLASInt Blas_one_2=1; 1229 PetscScalar *temp_basis,*correlation_mat; 1230 #else 1231 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1232 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1233 #endif 1234 /* change of basis */ 1235 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1236 PetscBool boolforchange,qr_needed; 1237 PetscBT touched,change_basis,qr_needed_idx; 1238 /* auxiliary stuff */ 1239 PetscInt *nnz,*is_indices,*local_to_B; 1240 /* some quantities */ 1241 PetscInt n_vertices,total_primal_vertices; 1242 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1243 1244 1245 PetscFunctionBegin; 1246 /* Get index sets for faces, edges and vertices from graph */ 1247 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1248 pcbddc->use_vertices = PETSC_TRUE; 1249 } 1250 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1251 /* HACK: provide functions to set change of basis */ 1252 if (!ISForVertices && pcbddc->NullSpace) { 1253 pcbddc->use_change_of_basis = PETSC_TRUE; 1254 pcbddc->use_change_on_faces = PETSC_FALSE; 1255 } 1256 /* print some info */ 1257 if (pcbddc->dbg_flag) { 1258 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1259 i = 0; 1260 if (ISForVertices) { 1261 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1262 } 1263 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1266 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1267 } 1268 /* check if near null space is attached to global mat */ 1269 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1270 if (nearnullsp) { 1271 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1272 } else { /* if near null space is not provided BDDC uses constants by default */ 1273 nnsp_size = 0; 1274 nnsp_has_cnst = PETSC_TRUE; 1275 } 1276 /* get max number of constraints on a single cc */ 1277 max_constraints = nnsp_size; 1278 if (nnsp_has_cnst) max_constraints++; 1279 1280 /* 1281 Evaluate maximum storage size needed by the procedure 1282 - temp_indices will contain start index of each constraint stored as follows 1283 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1284 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1285 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1286 */ 1287 total_counts = n_ISForFaces+n_ISForEdges; 1288 total_counts *= max_constraints; 1289 n_vertices = 0; 1290 if (ISForVertices) { 1291 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1292 } 1293 total_counts += n_vertices; 1294 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1295 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1296 total_counts = 0; 1297 max_size_of_constraint = 0; 1298 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1299 if (i<n_ISForEdges) { 1300 used_IS = &ISForEdges[i]; 1301 } else { 1302 used_IS = &ISForFaces[i-n_ISForEdges]; 1303 } 1304 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1305 total_counts += j; 1306 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1307 } 1308 total_counts *= max_constraints; 1309 total_counts += n_vertices; 1310 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1311 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1312 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1313 /* local to boundary numbering */ 1314 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1315 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1316 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1317 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1318 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1319 /* get local part of global near null space vectors */ 1320 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1321 for (k=0;k<nnsp_size;k++) { 1322 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1323 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1324 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1325 } 1326 1327 /* whether or not to skip lapack calls */ 1328 skip_lapack = PETSC_TRUE; 1329 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1330 1331 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1332 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1333 PetscScalar temp_work; 1334 #if defined(PETSC_MISSING_LAPACK_GESVD) 1335 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1336 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1337 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1338 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1339 #if defined(PETSC_USE_COMPLEX) 1340 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1341 #endif 1342 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1343 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1344 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1345 lwork = -1; 1346 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1347 #if !defined(PETSC_USE_COMPLEX) 1348 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1349 #else 1350 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1351 #endif 1352 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1353 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1354 #else /* on missing GESVD */ 1355 /* SVD */ 1356 PetscInt max_n,min_n; 1357 max_n = max_size_of_constraint; 1358 min_n = max_constraints; 1359 if (max_size_of_constraint < max_constraints) { 1360 min_n = max_size_of_constraint; 1361 max_n = max_constraints; 1362 } 1363 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1364 #if defined(PETSC_USE_COMPLEX) 1365 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1366 #endif 1367 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1368 lwork = -1; 1369 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1370 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1371 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1372 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1373 #if !defined(PETSC_USE_COMPLEX) 1374 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1375 #else 1376 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1377 #endif 1378 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1379 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1380 #endif /* on missing GESVD */ 1381 /* Allocate optimal workspace */ 1382 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1383 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1384 } 1385 /* Now we can loop on constraining sets */ 1386 total_counts = 0; 1387 temp_indices[0] = 0; 1388 /* vertices */ 1389 if (ISForVertices) { 1390 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1391 if (nnsp_has_cnst) { /* consider all vertices */ 1392 for (i=0;i<n_vertices;i++) { 1393 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1394 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1395 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1396 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1397 total_counts++; 1398 } 1399 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1400 PetscBool used_vertex; 1401 for (i=0;i<n_vertices;i++) { 1402 used_vertex = PETSC_FALSE; 1403 k = 0; 1404 while (!used_vertex && k<nnsp_size) { 1405 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1406 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1407 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1408 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1409 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1410 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1411 total_counts++; 1412 used_vertex = PETSC_TRUE; 1413 } 1414 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1415 k++; 1416 } 1417 } 1418 } 1419 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1420 n_vertices = total_counts; 1421 } 1422 1423 /* edges and faces */ 1424 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1425 if (i<n_ISForEdges) { 1426 used_IS = &ISForEdges[i]; 1427 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1428 } else { 1429 used_IS = &ISForFaces[i-n_ISForEdges]; 1430 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1431 } 1432 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1433 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1434 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1435 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1436 /* change of basis should not be performed on local periodic nodes */ 1437 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1438 if (nnsp_has_cnst) { 1439 PetscScalar quad_value; 1440 temp_constraints++; 1441 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1442 for (j=0;j<size_of_constraint;j++) { 1443 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1444 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1445 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1446 } 1447 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1448 if (boolforchange) { 1449 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1450 } 1451 total_counts++; 1452 } 1453 for (k=0;k<nnsp_size;k++) { 1454 PetscReal real_value; 1455 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1456 for (j=0;j<size_of_constraint;j++) { 1457 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1458 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1459 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1460 } 1461 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1462 /* check if array is null on the connected component */ 1463 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1464 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1465 if (real_value > 0.0) { /* keep indices and values */ 1466 temp_constraints++; 1467 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1468 if (boolforchange) { 1469 ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr); 1470 } 1471 total_counts++; 1472 } 1473 } 1474 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1475 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1476 if (!pcbddc->use_nnsp_true && temp_constraints) { 1477 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1478 1479 #if defined(PETSC_MISSING_LAPACK_GESVD) 1480 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1481 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1482 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1483 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1484 from that computed using LAPACKgesvd 1485 -> This is due to a different computation of eigenvectors in LAPACKheev 1486 -> The quality of the POD-computed basis will be the same */ 1487 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1488 /* Store upper triangular part of correlation matrix */ 1489 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1490 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1491 for (j=0;j<temp_constraints;j++) { 1492 for (k=0;k<j+1;k++) { 1493 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1494 } 1495 } 1496 /* compute eigenvalues and eigenvectors of correlation matrix */ 1497 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1498 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1499 #if !defined(PETSC_USE_COMPLEX) 1500 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1501 #else 1502 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1503 #endif 1504 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1505 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1506 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1507 j=0; 1508 while (j < temp_constraints && singular_vals[j] < tol) j++; 1509 total_counts=total_counts-j; 1510 /* scale and copy POD basis into used quadrature memory */ 1511 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1512 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1513 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1514 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1515 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1516 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1517 if (j<temp_constraints) { 1518 PetscInt ii; 1519 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1520 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1521 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1522 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1523 for (k=0;k<temp_constraints-j;k++) { 1524 for (ii=0;ii<size_of_constraint;ii++) { 1525 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1526 } 1527 } 1528 } 1529 #else /* on missing GESVD */ 1530 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1531 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1532 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1533 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1534 #if !defined(PETSC_USE_COMPLEX) 1535 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1536 #else 1537 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1538 #endif 1539 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1540 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1541 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1542 k = temp_constraints; 1543 if (k > size_of_constraint) k = size_of_constraint; 1544 j = 0; 1545 while (j < k && singular_vals[k-j-1] < tol) j++; 1546 total_counts = total_counts-temp_constraints+k-j; 1547 #endif /* on missing GESVD */ 1548 } 1549 } 1550 /* free index sets of faces, edges and vertices */ 1551 for (i=0;i<n_ISForFaces;i++) { 1552 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1553 } 1554 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1555 for (i=0;i<n_ISForEdges;i++) { 1556 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1557 } 1558 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1559 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1560 1561 /* free workspace */ 1562 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1563 ierr = PetscFree(work);CHKERRQ(ierr); 1564 #if defined(PETSC_USE_COMPLEX) 1565 ierr = PetscFree(rwork);CHKERRQ(ierr); 1566 #endif 1567 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1568 #if defined(PETSC_MISSING_LAPACK_GESVD) 1569 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1570 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1571 #endif 1572 } 1573 for (k=0;k<nnsp_size;k++) { 1574 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1575 } 1576 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1577 1578 /* set quantities in pcbddc data structure */ 1579 /* n_vertices defines the number of subdomain corners in the primal space */ 1580 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1581 pcbddc->local_primal_size = total_counts; 1582 pcbddc->n_vertices = n_vertices; 1583 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1584 1585 /* Create constraint matrix */ 1586 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1587 /* so we need to set it up properly either with or without change of basis */ 1588 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1589 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1590 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1591 /* array to compute a local numbering of constraints : vertices first then constraints */ 1592 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1593 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1594 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1595 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1596 /* auxiliary stuff for basis change */ 1597 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1598 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1599 1600 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1601 total_primal_vertices=0; 1602 for (i=0;i<pcbddc->local_primal_size;i++) { 1603 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1604 if (size_of_constraint == 1) { 1605 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1606 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1607 aux_primal_minloc[total_primal_vertices]=0; 1608 total_primal_vertices++; 1609 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1610 PetscInt min_loc,min_index; 1611 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1612 /* find first untouched local node */ 1613 k = 0; 1614 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1615 min_index = global_indices[k]; 1616 min_loc = k; 1617 /* search the minimum among global nodes already untouched on the cc */ 1618 for (k=1;k<size_of_constraint;k++) { 1619 /* there can be more than one constraint on a single connected component */ 1620 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1621 min_index = global_indices[k]; 1622 min_loc = k; 1623 } 1624 } 1625 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1626 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1627 aux_primal_minloc[total_primal_vertices]=min_loc; 1628 total_primal_vertices++; 1629 } 1630 } 1631 /* determine if a QR strategy is needed for change of basis */ 1632 qr_needed = PETSC_FALSE; 1633 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1634 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1635 if (PetscBTLookup(change_basis,i)) { 1636 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1637 j = 0; 1638 for (k=0;k<size_of_constraint;k++) { 1639 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1640 j++; 1641 } 1642 } 1643 /* found more than one primal dof on the cc */ 1644 if (j > 1) { 1645 PetscBTSet(qr_needed_idx,i); 1646 qr_needed = PETSC_TRUE; 1647 } 1648 } 1649 } 1650 /* free workspace */ 1651 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1652 /* permute indices in order to have a sorted set of vertices */ 1653 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1654 1655 /* nonzero structure of constraint matrix */ 1656 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1657 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1658 j=total_primal_vertices; 1659 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1660 if (!PetscBTLookup(change_basis,i)) { 1661 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1662 j++; 1663 } 1664 } 1665 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1666 ierr = PetscFree(nnz);CHKERRQ(ierr); 1667 /* set values in constraint matrix */ 1668 for (i=0;i<total_primal_vertices;i++) { 1669 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1670 } 1671 total_counts = total_primal_vertices; 1672 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1673 if (!PetscBTLookup(change_basis,i)) { 1674 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1675 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1676 total_counts++; 1677 } 1678 } 1679 /* assembling */ 1680 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1681 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1682 /* 1683 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1684 */ 1685 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1686 if (pcbddc->use_change_of_basis) { 1687 /* dual and primal dofs on a single cc */ 1688 PetscInt dual_dofs,primal_dofs; 1689 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1690 PetscInt primal_counter; 1691 /* working stuff for GEQRF */ 1692 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1693 PetscBLASInt lqr_work; 1694 /* working stuff for UNGQR */ 1695 PetscScalar *gqr_work,lgqr_work_t; 1696 PetscBLASInt lgqr_work; 1697 /* working stuff for TRTRS */ 1698 PetscScalar *trs_rhs; 1699 PetscBLASInt Blas_NRHS; 1700 /* pointers for values insertion into change of basis matrix */ 1701 PetscInt *start_rows,*start_cols; 1702 PetscScalar *start_vals; 1703 /* working stuff for values insertion */ 1704 PetscBT is_primal; 1705 1706 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1707 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1708 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1709 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1710 /* work arrays */ 1711 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1712 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1713 /* nonzeros per row */ 1714 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1715 if (PetscBTLookup(change_basis,i)) { 1716 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1717 if (PetscBTLookup(qr_needed_idx,i)) { 1718 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1719 } else { 1720 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1721 /* get local primal index on the cc */ 1722 j = 0; 1723 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1724 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1725 } 1726 } 1727 } 1728 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1729 ierr = PetscFree(nnz);CHKERRQ(ierr); 1730 /* Set initial identity in the matrix */ 1731 for (i=0;i<pcis->n_B;i++) { 1732 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1733 } 1734 1735 if (pcbddc->dbg_flag) { 1736 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1737 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1738 } 1739 1740 1741 /* Now we loop on the constraints which need a change of basis */ 1742 /* 1743 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1744 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1745 1746 Basic blocks of change of basis matrix T computed 1747 1748 - Using the following block transformation if there is only a primal dof on the cc 1749 (in the example, primal dof is the last one of the edge in LOCAL ordering 1750 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1751 | 1 0 ... 0 1 | 1752 | 0 1 ... 0 1 | 1753 | ... | 1754 | 0 ... 1 1 | 1755 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1756 1757 - via QR decomposition of constraints otherwise 1758 */ 1759 if (qr_needed) { 1760 /* space to store Q */ 1761 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1762 /* first we issue queries for optimal work */ 1763 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1764 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1765 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1766 lqr_work = -1; 1767 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1768 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1769 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1770 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1771 lgqr_work = -1; 1772 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1773 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1774 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1775 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1776 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1777 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1778 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1779 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1780 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1781 /* array to store scaling factors for reflectors */ 1782 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1783 /* array to store rhs and solution of triangular solver */ 1784 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1785 /* allocating workspace for check */ 1786 if (pcbddc->dbg_flag) { 1787 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1788 } 1789 } 1790 /* array to store whether a node is primal or not */ 1791 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1792 for (i=0;i<total_primal_vertices;i++) { 1793 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1794 } 1795 1796 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1797 /* -> using implicit ordering contained in temp_indices data */ 1798 total_counts = pcbddc->n_vertices; 1799 primal_counter = total_counts; 1800 while (total_counts<pcbddc->local_primal_size) { 1801 primal_dofs = 1; 1802 if (PetscBTLookup(change_basis,total_counts)) { 1803 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1804 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1805 primal_dofs++; 1806 } 1807 /* get constraint info */ 1808 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1809 dual_dofs = size_of_constraint-primal_dofs; 1810 1811 if (pcbddc->dbg_flag) { 1812 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1813 } 1814 1815 if (primal_dofs > 1) { /* QR */ 1816 1817 /* copy quadrature constraints for change of basis check */ 1818 if (pcbddc->dbg_flag) { 1819 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1820 } 1821 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1822 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1823 1824 /* compute QR decomposition of constraints */ 1825 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1826 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1827 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1828 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1829 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1830 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1831 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1832 1833 /* explictly compute R^-T */ 1834 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1835 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1836 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1837 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1838 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1839 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1840 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1841 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1842 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1843 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1844 1845 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1846 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1847 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1848 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1849 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1850 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1851 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1852 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1853 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1854 1855 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1856 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1857 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1858 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1859 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1860 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1861 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1862 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1863 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1864 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1865 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1866 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1867 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1868 1869 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1870 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1871 /* insert cols for primal dofs */ 1872 for (j=0;j<primal_dofs;j++) { 1873 start_vals = &qr_basis[j*size_of_constraint]; 1874 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1875 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1876 } 1877 /* insert cols for dual dofs */ 1878 for (j=0,k=0;j<dual_dofs;k++) { 1879 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1880 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1881 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1882 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1883 j++; 1884 } 1885 } 1886 1887 /* check change of basis */ 1888 if (pcbddc->dbg_flag) { 1889 PetscInt ii,jj; 1890 PetscBool valid_qr=PETSC_TRUE; 1891 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1892 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1893 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1894 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1895 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1896 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1897 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1898 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1899 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1900 for (jj=0;jj<size_of_constraint;jj++) { 1901 for (ii=0;ii<primal_dofs;ii++) { 1902 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1903 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1904 } 1905 } 1906 if (!valid_qr) { 1907 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1908 for (jj=0;jj<size_of_constraint;jj++) { 1909 for (ii=0;ii<primal_dofs;ii++) { 1910 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1911 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1912 } 1913 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1914 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1915 } 1916 } 1917 } 1918 } else { 1919 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1920 } 1921 } 1922 } else { /* simple transformation block */ 1923 PetscInt row,col; 1924 PetscScalar val; 1925 for (j=0;j<size_of_constraint;j++) { 1926 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 1927 if (!PetscBTLookup(is_primal,row)) { 1928 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1929 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 1930 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 1931 } else { 1932 for (k=0;k<size_of_constraint;k++) { 1933 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1934 if (row != col) { 1935 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1936 } else { 1937 val = 1.0; 1938 } 1939 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 1940 } 1941 } 1942 } 1943 } 1944 /* increment primal counter */ 1945 primal_counter += primal_dofs; 1946 } else { 1947 if (pcbddc->dbg_flag) { 1948 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1949 } 1950 } 1951 /* increment constraint counter total_counts */ 1952 total_counts += primal_dofs; 1953 } 1954 1955 /* free workspace */ 1956 if (qr_needed) { 1957 if (pcbddc->dbg_flag) { 1958 ierr = PetscFree(work);CHKERRQ(ierr); 1959 } 1960 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1961 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1962 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1963 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1964 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1965 } 1966 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1967 /* assembling */ 1968 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1969 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1970 /* 1971 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1972 */ 1973 } 1974 1975 /* flush dbg viewer */ 1976 if (pcbddc->dbg_flag) { 1977 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1978 } 1979 1980 /* free workspace */ 1981 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1982 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 1983 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1984 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1985 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1986 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1987 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1988 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1989 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1990 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1991 PetscFunctionReturn(0); 1992 } 1993 1994 #undef __FUNCT__ 1995 #define __FUNCT__ "PCBDDCAnalyzeInterface" 1996 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 1997 { 1998 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1999 PC_IS *pcis = (PC_IS*)pc->data; 2000 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2001 PetscInt bs,ierr,i,vertex_size; 2002 PetscViewer viewer=pcbddc->dbg_viewer; 2003 2004 PetscFunctionBegin; 2005 /* Init local Graph struct */ 2006 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2007 2008 /* Check validity of the csr graph passed in by the user */ 2009 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2010 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2011 } 2012 2013 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2014 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2015 Mat mat_adj; 2016 const PetscInt *xadj,*adjncy; 2017 PetscBool flg_row=PETSC_TRUE; 2018 2019 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2020 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2021 if (!flg_row) { 2022 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2023 } 2024 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2025 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2026 if (!flg_row) { 2027 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2028 } 2029 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2030 } 2031 2032 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2033 vertex_size = 1; 2034 if (!pcbddc->user_provided_isfordofs) { 2035 if (!pcbddc->n_ISForDofs) { 2036 IS *custom_ISForDofs; 2037 2038 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2039 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2040 for (i=0;i<bs;i++) { 2041 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2042 } 2043 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2044 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2045 /* remove my references to IS objects */ 2046 for (i=0;i<bs;i++) { 2047 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2048 } 2049 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2050 } 2051 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2052 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2053 } 2054 2055 /* Setup of Graph */ 2056 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2057 2058 /* Graph's connected components analysis */ 2059 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2060 2061 /* print some info to stdout */ 2062 if (pcbddc->dbg_flag) { 2063 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2064 } 2065 PetscFunctionReturn(0); 2066 } 2067 2068 #undef __FUNCT__ 2069 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2070 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2071 { 2072 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2073 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2074 PetscErrorCode ierr; 2075 2076 PetscFunctionBegin; 2077 n = 0; 2078 vertices = 0; 2079 if (pcbddc->ConstraintMatrix) { 2080 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2081 for (i=0;i<local_primal_size;i++) { 2082 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2083 if (size_of_constraint == 1) n++; 2084 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2085 } 2086 if (vertices_idx) { 2087 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2088 n = 0; 2089 for (i=0;i<local_primal_size;i++) { 2090 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2091 if (size_of_constraint == 1) { 2092 vertices[n++]=row_cmat_indices[0]; 2093 } 2094 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2095 } 2096 } 2097 } 2098 *n_vertices = n; 2099 if (vertices_idx) *vertices_idx = vertices; 2100 PetscFunctionReturn(0); 2101 } 2102 2103 #undef __FUNCT__ 2104 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2105 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2106 { 2107 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2108 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2109 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2110 PetscBT touched; 2111 PetscErrorCode ierr; 2112 2113 /* This function assumes that the number of local constraints per connected component 2114 is not greater than the number of nodes defined for the connected component 2115 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2116 PetscFunctionBegin; 2117 n = 0; 2118 constraints_index = 0; 2119 if (pcbddc->ConstraintMatrix) { 2120 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2121 max_size_of_constraint = 0; 2122 for (i=0;i<local_primal_size;i++) { 2123 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2124 if (size_of_constraint > 1) { 2125 n++; 2126 } 2127 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2128 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2129 } 2130 if (constraints_idx) { 2131 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2132 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2133 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2134 n = 0; 2135 for (i=0;i<local_primal_size;i++) { 2136 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2137 if (size_of_constraint > 1) { 2138 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2139 /* find first untouched local node */ 2140 j = 0; 2141 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2142 min_index = row_cmat_global_indices[j]; 2143 min_loc = j; 2144 /* search the minimum among nodes not yet touched on the connected component 2145 since there can be more than one constraint on a single cc */ 2146 for (j=1;j<size_of_constraint;j++) { 2147 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2148 min_index = row_cmat_global_indices[j]; 2149 min_loc = j; 2150 } 2151 } 2152 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2153 constraints_index[n++] = row_cmat_indices[min_loc]; 2154 } 2155 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2156 } 2157 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2158 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2159 } 2160 } 2161 *n_constraints = n; 2162 if (constraints_idx) *constraints_idx = constraints_index; 2163 PetscFunctionReturn(0); 2164 } 2165 2166 /* the next two functions has been adapted from pcis.c */ 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "PCBDDCApplySchur" 2169 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2170 { 2171 PetscErrorCode ierr; 2172 PC_IS *pcis = (PC_IS*)(pc->data); 2173 2174 PetscFunctionBegin; 2175 if (!vec2_B) { vec2_B = v; } 2176 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2177 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2178 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2179 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2180 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2181 PetscFunctionReturn(0); 2182 } 2183 2184 #undef __FUNCT__ 2185 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2186 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2187 { 2188 PetscErrorCode ierr; 2189 PC_IS *pcis = (PC_IS*)(pc->data); 2190 2191 PetscFunctionBegin; 2192 if (!vec2_B) { vec2_B = v; } 2193 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2194 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2195 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2196 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2197 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2198 PetscFunctionReturn(0); 2199 } 2200 2201 #undef __FUNCT__ 2202 #define __FUNCT__ "PCBDDCSubsetNumbering" 2203 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2204 { 2205 Vec local_vec,global_vec; 2206 IS seqis,paris; 2207 VecScatter scatter_ctx; 2208 PetscScalar *array; 2209 PetscInt *temp_global_dofs; 2210 PetscScalar globalsum; 2211 PetscInt i,j,s; 2212 PetscInt nlocals,first_index,old_index,max_local; 2213 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2214 PetscMPIInt *dof_sizes,*dof_displs; 2215 PetscBool first_found; 2216 PetscErrorCode ierr; 2217 2218 PetscFunctionBegin; 2219 /* mpi buffers */ 2220 MPI_Comm_size(comm,&size_prec_comm); 2221 MPI_Comm_rank(comm,&rank_prec_comm); 2222 j = ( !rank_prec_comm ? size_prec_comm : 0); 2223 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2224 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2225 /* get maximum size of subset */ 2226 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2227 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2228 max_local = 0; 2229 if (n_local_dofs) { 2230 max_local = temp_global_dofs[0]; 2231 for (i=1;i<n_local_dofs;i++) { 2232 if (max_local < temp_global_dofs[i] ) { 2233 max_local = temp_global_dofs[i]; 2234 } 2235 } 2236 } 2237 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2238 max_global++; 2239 max_local = 0; 2240 if (n_local_dofs) { 2241 max_local = local_dofs[0]; 2242 for (i=1;i<n_local_dofs;i++) { 2243 if (max_local < local_dofs[i] ) { 2244 max_local = local_dofs[i]; 2245 } 2246 } 2247 } 2248 max_local++; 2249 /* allocate workspace */ 2250 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2251 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2252 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2253 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2254 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2255 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2256 /* create scatter */ 2257 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2258 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2259 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2260 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2261 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2262 /* init array */ 2263 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2264 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2265 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2266 if (local_dofs_mult) { 2267 for (i=0;i<n_local_dofs;i++) { 2268 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2269 } 2270 } else { 2271 for (i=0;i<n_local_dofs;i++) { 2272 array[local_dofs[i]]=1.0; 2273 } 2274 } 2275 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2276 /* scatter into global vec and get total number of global dofs */ 2277 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2278 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2279 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2280 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2281 /* Fill global_vec with cumulative function for global numbering */ 2282 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2283 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2284 nlocals = 0; 2285 first_index = -1; 2286 first_found = PETSC_FALSE; 2287 for (i=0;i<s;i++) { 2288 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2289 first_found = PETSC_TRUE; 2290 first_index = i; 2291 } 2292 nlocals += (PetscInt)PetscRealPart(array[i]); 2293 } 2294 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2295 if (!rank_prec_comm) { 2296 dof_displs[0]=0; 2297 for (i=1;i<size_prec_comm;i++) { 2298 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2299 } 2300 } 2301 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2302 if (first_found) { 2303 array[first_index] += (PetscScalar)nlocals; 2304 old_index = first_index; 2305 for (i=first_index+1;i<s;i++) { 2306 if (PetscRealPart(array[i]) > 0.0) { 2307 array[i] += array[old_index]; 2308 old_index = i; 2309 } 2310 } 2311 } 2312 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2313 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2314 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2315 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2316 /* get global ordering of local dofs */ 2317 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2318 if (local_dofs_mult) { 2319 for (i=0;i<n_local_dofs;i++) { 2320 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2321 } 2322 } else { 2323 for (i=0;i<n_local_dofs;i++) { 2324 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2325 } 2326 } 2327 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2328 /* free workspace */ 2329 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2330 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2331 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2332 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2333 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2334 /* return pointer to global ordering of local dofs */ 2335 *global_numbering_subset = temp_global_dofs; 2336 PetscFunctionReturn(0); 2337 } 2338 2339 #undef __FUNCT__ 2340 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2341 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2342 { 2343 PetscInt i,j; 2344 PetscScalar *alphas; 2345 PetscErrorCode ierr; 2346 2347 PetscFunctionBegin; 2348 /* this implements stabilized Gram-Schmidt */ 2349 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2350 for (i=0;i<n;i++) { 2351 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2352 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2353 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2354 } 2355 ierr = PetscFree(alphas);CHKERRQ(ierr); 2356 PetscFunctionReturn(0); 2357 } 2358 2359 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2360 #undef __FUNCT__ 2361 #define __FUNCT__ "MatConvert_IS_AIJ" 2362 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2363 { 2364 Mat new_mat; 2365 Mat_IS *matis = (Mat_IS*)(mat->data); 2366 /* info on mat */ 2367 PetscInt bs,rows,cols; 2368 PetscInt lrows,lcols; 2369 PetscInt local_rows,local_cols; 2370 PetscMPIInt nsubdomains; 2371 /* preallocation */ 2372 Vec vec_dnz,vec_onz; 2373 PetscScalar *my_dnz,*my_onz,*array; 2374 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2375 PetscInt index_row,index_col,owner; 2376 PetscInt *local_indices,*global_indices; 2377 /* work */ 2378 PetscInt i,j; 2379 PetscErrorCode ierr; 2380 2381 PetscFunctionBegin; 2382 /* CHECKS */ 2383 /* get info from mat */ 2384 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2385 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2386 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2387 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2388 2389 /* MAT_INITIAL_MATRIX */ 2390 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2391 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2392 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2393 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2394 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2395 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2396 2397 /* 2398 preallocation 2399 */ 2400 2401 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2402 /* 2403 Some vectors are needed to sum up properly on shared interface dofs. 2404 Note that preallocation is not exact, since it overestimates nonzeros 2405 */ 2406 /* 2407 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2408 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2409 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2410 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2411 */ 2412 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2413 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2414 /* All processes need to compute entire row ownership */ 2415 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2416 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2417 for (i=0;i<nsubdomains;i++) { 2418 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2419 row_ownership[j]=i; 2420 } 2421 } 2422 /* map indices of local mat to global values */ 2423 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2424 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2425 for (i=0;i<local_rows;i++) local_indices[i]=i; 2426 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2427 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2428 2429 /* 2430 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2431 then, they will be summed up properly. This way, preallocation is always sufficient 2432 */ 2433 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2434 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2435 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2436 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2437 for (i=0;i<local_rows;i++) { 2438 index_row = global_indices[i]; 2439 for (j=i;j<local_rows;j++) { 2440 owner = row_ownership[index_row]; 2441 index_col = global_indices[j]; 2442 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2443 my_dnz[i] += 1.0; 2444 } else { /* offdiag block */ 2445 my_onz[i] += 1.0; 2446 } 2447 /* same as before, interchanging rows and cols */ 2448 if (i != j) { 2449 owner = row_ownership[index_col]; 2450 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2451 my_dnz[j] += 1.0; 2452 } else { 2453 my_onz[j] += 1.0; 2454 } 2455 } 2456 } 2457 } 2458 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2459 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2460 if (local_rows) { /* multilevel guard */ 2461 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2462 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2463 } 2464 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2465 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2466 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2467 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2468 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2469 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2470 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2471 2472 /* set computed preallocation in dnz and onz */ 2473 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2474 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2475 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2476 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2477 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2478 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2479 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2480 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2481 2482 /* Resize preallocation if overestimated */ 2483 for (i=0;i<lrows;i++) { 2484 dnz[i] = PetscMin(dnz[i],lcols); 2485 onz[i] = PetscMin(onz[i],cols-lcols); 2486 } 2487 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2488 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2489 2490 /* 2491 Set values. Very Basic. 2492 */ 2493 for (i=0;i<local_rows;i++) { 2494 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2495 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2496 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2497 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2498 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2499 } 2500 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2501 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2502 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2503 2504 /* get back mat */ 2505 *M = new_mat; 2506 PetscFunctionReturn(0); 2507 } 2508 2509 #undef __FUNCT__ 2510 #define __FUNCT__ "MatISSubassemble_Private" 2511 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2512 { 2513 Mat subdomain_adj; 2514 IS new_ranks,ranks_send_to; 2515 MatPartitioning partitioner; 2516 Mat_IS *matis; 2517 PetscInt n_neighs,*neighs,*n_shared,**shared; 2518 PetscInt prank; 2519 PetscMPIInt size,rank,color; 2520 PetscInt *xadj,*adjncy,*oldranks; 2521 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2522 PetscInt i,j,n_subdomains,local_size,threshold=0; 2523 PetscErrorCode ierr; 2524 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2525 PetscSubcomm subcomm; 2526 2527 PetscFunctionBegin; 2528 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2529 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2530 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2531 2532 /* Get info on mapping */ 2533 matis = (Mat_IS*)(mat->data); 2534 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2535 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2536 2537 /* build local CSR graph of subdomains' connectivity */ 2538 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2539 xadj[0] = 0; 2540 xadj[1] = PetscMax(n_neighs-1,0); 2541 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2542 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2543 2544 if (threshold) { 2545 PetscInt* count,min_threshold; 2546 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2547 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2548 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2549 for (j=0;j<n_shared[i];j++) { 2550 count[shared[i][j]] += 1; 2551 } 2552 } 2553 /* adapt threshold since we dont want to lose significant connections */ 2554 min_threshold = n_neighs; 2555 for (i=1;i<n_neighs;i++) { 2556 for (j=0;j<n_shared[i];j++) { 2557 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2558 } 2559 } 2560 PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold); 2561 threshold = PetscMax(min_threshold+1,threshold); 2562 PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold); 2563 xadj[1] = 0; 2564 for (i=1;i<n_neighs;i++) { 2565 for (j=0;j<n_shared[i];j++) { 2566 if (count[shared[i][j]] < threshold) { 2567 adjncy[xadj[1]] = neighs[i]; 2568 adjncy_wgt[xadj[1]] = n_shared[i]; 2569 xadj[1]++; 2570 break; 2571 } 2572 } 2573 } 2574 ierr = PetscFree(count);CHKERRQ(ierr); 2575 } else { 2576 if (xadj[1]) { 2577 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2578 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2579 } 2580 } 2581 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2582 if (use_square) { 2583 for (i=0;i<xadj[1];i++) { 2584 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2585 } 2586 } 2587 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2588 2589 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2590 2591 /* 2592 Restrict work on active processes only. 2593 */ 2594 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2595 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2596 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2597 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2598 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2599 if (color) { 2600 ierr = PetscFree(xadj);CHKERRQ(ierr); 2601 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2602 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2603 } else { 2604 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2605 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2606 prank = rank; 2607 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2608 for (i=0;i<size;i++) { 2609 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2610 } 2611 for (i=0;i<xadj[1];i++) { 2612 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2613 } 2614 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2615 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2616 n_subdomains = (PetscInt)size; 2617 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2618 2619 /* Partition */ 2620 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2621 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2622 if (use_vwgt) { 2623 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2624 v_wgt[0] = local_size; 2625 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2626 } 2627 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2628 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2629 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2630 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2631 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2632 2633 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2634 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2635 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2636 /* clean up */ 2637 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2638 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2639 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2640 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2641 } 2642 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2643 2644 /* assemble parallel IS for sends */ 2645 i = 1; 2646 if (color) i=0; 2647 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2648 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2649 2650 /* get back IS */ 2651 *is_sends = ranks_send_to; 2652 PetscFunctionReturn(0); 2653 } 2654 2655 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2656 2657 #undef __FUNCT__ 2658 #define __FUNCT__ "MatISSubassemble" 2659 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2660 { 2661 Mat local_mat,new_mat; 2662 Mat_IS *matis; 2663 IS is_sends_internal; 2664 PetscInt rows,cols; 2665 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2666 PetscBool ismatis,isdense; 2667 ISLocalToGlobalMapping l2gmap; 2668 PetscInt* l2gmap_indices; 2669 const PetscInt* is_indices; 2670 MatType new_local_type; 2671 MatTypePrivate new_local_type_private; 2672 /* buffers */ 2673 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2674 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2675 /* MPI */ 2676 MPI_Comm comm; 2677 PetscMPIInt n_sends,n_recvs,commsize; 2678 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2679 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2680 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2681 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2682 PetscErrorCode ierr; 2683 2684 PetscFunctionBegin; 2685 /* checks */ 2686 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2687 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2688 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2689 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2690 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2691 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2692 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2693 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2694 PetscValidLogicalCollectiveInt(mat,bs,0); 2695 /* prepare IS for sending if not provided */ 2696 if (!is_sends) { 2697 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2698 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2699 } else { 2700 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2701 is_sends_internal = is_sends; 2702 } 2703 2704 /* get pointer of MATIS data */ 2705 matis = (Mat_IS*)mat->data; 2706 2707 /* get comm */ 2708 comm = PetscObjectComm((PetscObject)mat); 2709 2710 /* compute number of sends */ 2711 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2712 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2713 2714 /* compute number of receives */ 2715 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2716 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2717 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2718 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2719 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2720 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2721 ierr = PetscFree(iflags);CHKERRQ(ierr); 2722 2723 /* prepare send/receive buffers */ 2724 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2725 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2726 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2727 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2728 2729 /* Get data from local mat */ 2730 if (!isdense) { 2731 /* TODO: See below some guidelines on how to prepare the local buffers */ 2732 /* 2733 send_buffer_vals should contain the raw values of the local matrix 2734 send_buffer_idxs should contain: 2735 - MatType_PRIVATE type 2736 - PetscInt size_of_l2gmap 2737 - PetscInt global_row_indices[size_of_l2gmap] 2738 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2739 */ 2740 } else { 2741 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2742 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2743 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2744 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2745 send_buffer_idxs[1] = i; 2746 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2747 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2748 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2749 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2750 for (i=0;i<n_sends;i++) { 2751 ilengths_vals[is_indices[i]] = len*len; 2752 ilengths_idxs[is_indices[i]] = len+2; 2753 } 2754 } 2755 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2756 buf_size_idxs = 0; 2757 buf_size_vals = 0; 2758 for (i=0;i<n_recvs;i++) { 2759 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2760 buf_size_vals += (PetscInt)olengths_vals[i]; 2761 } 2762 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2763 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2764 2765 /* get new tags for clean communications */ 2766 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2767 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2768 2769 /* allocate for requests */ 2770 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2771 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2772 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2773 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2774 2775 /* communications */ 2776 ptr_idxs = recv_buffer_idxs; 2777 ptr_vals = recv_buffer_vals; 2778 for (i=0;i<n_recvs;i++) { 2779 source_dest = onodes[i]; 2780 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2781 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2782 ptr_idxs += olengths_idxs[i]; 2783 ptr_vals += olengths_vals[i]; 2784 } 2785 for (i=0;i<n_sends;i++) { 2786 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2787 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2788 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2789 } 2790 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2791 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2792 2793 /* assemble new l2g map */ 2794 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2795 ptr_idxs = recv_buffer_idxs; 2796 buf_size_idxs = 0; 2797 for (i=0;i<n_recvs;i++) { 2798 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2799 ptr_idxs += olengths_idxs[i]; 2800 } 2801 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2802 ptr_idxs = recv_buffer_idxs; 2803 buf_size_idxs = 0; 2804 for (i=0;i<n_recvs;i++) { 2805 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2806 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2807 ptr_idxs += olengths_idxs[i]; 2808 } 2809 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2810 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2811 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2812 2813 /* infer new local matrix type from received local matrices type */ 2814 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2815 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2816 new_local_type_private = MATAIJ_PRIVATE; 2817 new_local_type = MATSEQAIJ; 2818 if (n_recvs) { 2819 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 2820 ptr_idxs = recv_buffer_idxs; 2821 for (i=0;i<n_recvs;i++) { 2822 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2823 new_local_type_private = MATAIJ_PRIVATE; 2824 break; 2825 } 2826 ptr_idxs += olengths_idxs[i]; 2827 } 2828 switch (new_local_type_private) { 2829 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2830 new_local_type = MATSEQAIJ; 2831 bs = 1; 2832 break; 2833 case MATAIJ_PRIVATE: 2834 new_local_type = MATSEQAIJ; 2835 bs = 1; 2836 break; 2837 case MATBAIJ_PRIVATE: 2838 new_local_type = MATSEQBAIJ; 2839 break; 2840 case MATSBAIJ_PRIVATE: 2841 new_local_type = MATSEQSBAIJ; 2842 break; 2843 default: 2844 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2845 break; 2846 } 2847 } 2848 2849 /* create MATIS object */ 2850 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2851 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2852 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2853 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2854 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2855 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2856 2857 /* set values */ 2858 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2859 ptr_vals = recv_buffer_vals; 2860 ptr_idxs = recv_buffer_idxs; 2861 for (i=0;i<n_recvs;i++) { 2862 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2863 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2864 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2865 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2866 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2867 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2868 } 2869 ptr_idxs += olengths_idxs[i]; 2870 ptr_vals += olengths_vals[i]; 2871 } 2872 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2873 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2874 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2875 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2876 2877 { /* check */ 2878 Vec lvec,rvec; 2879 PetscReal infty_error; 2880 2881 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2882 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2883 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2884 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2885 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2886 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2887 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2888 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2889 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2890 } 2891 2892 /* free workspace */ 2893 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2894 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2895 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2896 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2897 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2898 if (isdense) { 2899 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2900 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2901 } else { 2902 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2903 } 2904 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2905 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2906 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2907 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2908 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2909 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2910 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2911 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2912 ierr = PetscFree(onodes);CHKERRQ(ierr); 2913 /* get back new mat */ 2914 *mat_n = new_mat; 2915 PetscFunctionReturn(0); 2916 } 2917 2918 #undef __FUNCT__ 2919 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2920 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2921 { 2922 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2923 PC_IS *pcis = (PC_IS*)pc->data; 2924 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2925 MatNullSpace CoarseNullSpace=NULL; 2926 ISLocalToGlobalMapping coarse_islg; 2927 IS coarse_is; 2928 PetscInt max_it,coarse_size,*local_primal_indices=NULL; 2929 PetscInt im_active=-1,active_procs=-1; 2930 PC pc_temp; 2931 PCType coarse_pc_type; 2932 KSPType coarse_ksp_type; 2933 PetscBool multilevel_requested,multilevel_allowed; 2934 PetscBool setsym,issym,isbddc,isnn; 2935 MatStructure matstruct; 2936 PetscErrorCode ierr; 2937 2938 PetscFunctionBegin; 2939 /* Assign global numbering to coarse dofs */ 2940 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2941 2942 /* infer some info from user */ 2943 issym = PETSC_FALSE; 2944 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2945 multilevel_allowed = PETSC_FALSE; 2946 multilevel_requested = PETSC_FALSE; 2947 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 2948 if (multilevel_requested) { 2949 /* count "active processes" */ 2950 im_active = !!(pcis->n); 2951 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2952 if (active_procs/pcbddc->coarsening_ratio < 2) { 2953 multilevel_allowed = PETSC_FALSE; 2954 } else { 2955 multilevel_allowed = PETSC_TRUE; 2956 } 2957 } 2958 2959 /* set defaults for coarse KSP and PC */ 2960 if (multilevel_allowed) { 2961 if (issym) { 2962 coarse_ksp_type = KSPRICHARDSON; 2963 } else { 2964 coarse_ksp_type = KSPCHEBYSHEV; 2965 } 2966 coarse_pc_type = PCBDDC; 2967 } else { 2968 coarse_ksp_type = KSPPREONLY; 2969 coarse_pc_type = PCREDUNDANT; 2970 } 2971 2972 /* create the coarse KSP object only once with defaults */ 2973 if (!pcbddc->coarse_ksp) { 2974 char prefix[256],str_level[3]; 2975 size_t len; 2976 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 2977 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2978 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 2979 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2980 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2981 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2982 /* prefix */ 2983 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 2984 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2985 if (!pcbddc->current_level) { 2986 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2987 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 2988 } else { 2989 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2990 if (pcbddc->current_level>1) len -= 2; 2991 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 2992 *(prefix+len)='\0'; 2993 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 2994 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 2995 } 2996 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 2997 } 2998 /* allow user customization */ 2999 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3000 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3001 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3002 3003 /* get some info after set from options */ 3004 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3005 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3006 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3007 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3008 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3009 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3010 isbddc = PETSC_FALSE; 3011 } 3012 3013 /* propagate BDDC info to the next level */ 3014 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3015 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3016 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3017 3018 /* creates temporary MATIS object for coarse matrix */ 3019 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3020 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3021 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3022 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3023 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3024 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3025 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3026 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3027 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3028 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 3029 3030 /* assemble coarse matrix */ 3031 if (isbddc || isnn) { 3032 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3033 } else { 3034 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3035 } 3036 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3037 3038 /* create local to global scatters for coarse problem */ 3039 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3040 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3041 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3042 3043 /* propagate symmetry info to coarse matrix */ 3044 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3045 3046 /* Compute coarse null space (special handling by BDDC only) */ 3047 if (pcbddc->NullSpace) { 3048 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3049 if (isbddc) { 3050 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3051 } else { 3052 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3053 } 3054 } 3055 3056 /* set operators */ 3057 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3058 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3059 3060 /* additional KSP customization */ 3061 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3062 if (max_it < 5) { 3063 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3064 } 3065 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3066 3067 3068 /* print some info if requested */ 3069 if (pcbddc->dbg_flag) { 3070 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3071 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3072 if (!multilevel_allowed) { 3073 if (multilevel_requested) { 3074 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3075 } else if (pcbddc->max_levels) { 3076 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3077 } 3078 } 3079 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3080 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3081 } 3082 3083 /* setup coarse ksp */ 3084 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3085 if (pcbddc->dbg_flag) { 3086 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3087 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3088 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3089 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3090 } 3091 3092 /* Check coarse problem if requested */ 3093 if (pcbddc->dbg_flag) { 3094 KSP check_ksp; 3095 KSPType check_ksp_type; 3096 PC check_pc; 3097 Vec check_vec; 3098 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3099 PetscInt its; 3100 PetscBool ispreonly,compute; 3101 3102 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3103 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3104 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3105 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3106 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3107 if (ispreonly) { 3108 check_ksp_type = KSPPREONLY; 3109 compute = PETSC_FALSE; 3110 } else { 3111 if (issym) check_ksp_type = KSPCG; 3112 else check_ksp_type = KSPGMRES; 3113 compute = PETSC_TRUE; 3114 } 3115 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3116 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3117 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3118 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3119 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3120 /* create random vec */ 3121 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3122 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3123 if (CoarseNullSpace) { 3124 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3125 } 3126 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3127 /* solve coarse problem */ 3128 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3129 if (CoarseNullSpace) { 3130 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3131 } 3132 /* check coarse problem residual error */ 3133 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3134 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3135 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3136 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3137 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3138 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3139 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3140 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3141 /* get eigenvalue estimation if preonly has not been requested */ 3142 if (!ispreonly) { 3143 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3144 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3145 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3146 } 3147 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3148 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3149 } 3150 /* free memory */ 3151 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3152 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3153 PetscFunctionReturn(0); 3154 } 3155 3156 #undef __FUNCT__ 3157 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3158 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3159 { 3160 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3161 PC_IS* pcis = (PC_IS*)pc->data; 3162 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3163 PetscInt i,j,coarse_size; 3164 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3165 PetscErrorCode ierr; 3166 3167 PetscFunctionBegin; 3168 /* get indices in local ordering for vertices and constraints */ 3169 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3170 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3171 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3172 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3173 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3174 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3175 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3176 3177 /* Compute global number of coarse dofs */ 3178 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3179 3180 /* check numbering */ 3181 if (pcbddc->dbg_flag) { 3182 PetscScalar coarsesum,*array; 3183 3184 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3185 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3186 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3187 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3188 for (i=0;i<pcbddc->local_primal_size;i++) { 3189 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3190 } 3191 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3192 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3193 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3194 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3195 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3196 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3197 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3198 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3199 for (i=0;i<pcis->n;i++) { 3200 if (array[i] == 1.0) { 3201 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3202 } 3203 } 3204 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3205 for (i=0;i<pcis->n;i++) { 3206 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3207 } 3208 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3209 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3210 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3211 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3212 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3213 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3214 if (pcbddc->dbg_flag > 1) { 3215 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3216 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3217 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3218 for (i=0;i<pcbddc->local_primal_size;i++) { 3219 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]); 3220 } 3221 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3222 } 3223 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3224 } 3225 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3226 /* get back data */ 3227 *coarse_size_n = coarse_size; 3228 *local_primal_indices_n = local_primal_indices; 3229 PetscFunctionReturn(0); 3230 } 3231 3232