1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCResetCustomization" 47 PetscErrorCode PCBDDCResetCustomization(PC pc) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 PetscInt i; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 56 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 59 for (i=0;i<pcbddc->n_ISForDofs;i++) { 60 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 61 } 62 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 63 pcbddc->n_ISForDofs = 0; 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "PCBDDCResetTopography" 69 PetscErrorCode PCBDDCResetTopography(PC pc) 70 { 71 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 72 PetscErrorCode ierr; 73 74 PetscFunctionBegin; 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 102 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "PCBDDCCreateWorkVectors" 111 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 112 { 113 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 114 PC_IS *pcis = (PC_IS*)pc->data; 115 VecType impVecType; 116 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 117 PetscErrorCode ierr; 118 119 PetscFunctionBegin; 120 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 121 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 122 local_primal_size = n_constraints+n_vertices; 123 n_R = pcis->n-n_vertices; 124 /* local work vectors */ 125 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 126 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 127 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 128 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 129 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 130 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 131 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 132 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 133 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 134 if (n_constraints) { 135 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 136 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 137 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "PCBDDCSetUpCorrection" 144 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 145 { 146 PetscErrorCode ierr; 147 /* pointers to pcis and pcbddc */ 148 PC_IS* pcis = (PC_IS*)pc->data; 149 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 150 /* submatrices of local problem */ 151 Mat A_RV,A_VR,A_VV; 152 /* working matrices */ 153 Mat M1,M2,M3,C_CR; 154 /* working vectors */ 155 Vec vec1_C,vec2_C,vec1_V,vec2_V; 156 /* additional working stuff */ 157 IS is_aux; 158 ISLocalToGlobalMapping BtoNmap; 159 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 160 const PetscScalar *array,*row_cmat_values; 161 const PetscInt *row_cmat_indices,*idx_R_local; 162 PetscInt *vertices,*idx_V_B,*auxindices; 163 PetscInt n_vertices,n_constraints,size_of_constraint; 164 PetscInt i,j,n_R,n_D,n_B; 165 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 166 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 167 MatType impMatType; 168 /* some shortcuts to scalars */ 169 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 170 /* for debugging purposes */ 171 PetscReal *coarsefunctions_errors,*constraints_errors; 172 173 PetscFunctionBegin; 174 /* get number of vertices and their local indices */ 175 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 176 n_constraints = pcbddc->local_primal_size-n_vertices; 177 /* Set Non-overlapping dimensions */ 178 n_B = pcis->n_B; n_D = pcis->n - n_B; 179 n_R = pcis->n-n_vertices; 180 181 /* Set types for local objects needed by BDDC precondtioner */ 182 impMatType = MATSEQDENSE; 183 184 /* Allocating some extra storage just to be safe */ 185 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 186 for (i=0;i<pcis->n;i++) auxindices[i]=i; 187 188 /* vertices in boundary numbering */ 189 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 190 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 191 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 192 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 193 if (i != n_vertices) { 194 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 195 } 196 197 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 198 if (n_constraints) { 199 /* work vectors */ 200 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 201 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 202 /* auxiliary matrices */ 203 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 204 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 205 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 206 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 207 208 /* Extract constraints on R nodes: C_{CR} */ 209 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 210 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 211 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 212 213 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 214 for (i=0;i<n_constraints;i++) { 215 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 216 /* Get row of constraint matrix in R numbering */ 217 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 218 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 219 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 220 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 221 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 222 /* Solve for row of constraint matrix in R numbering */ 223 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 224 /* Set values in local_auxmat2 */ 225 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 226 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 227 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 228 } 229 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 230 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 231 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 232 233 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 234 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 235 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 236 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 237 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 238 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 239 ierr = MatSetUp(M1);CHKERRQ(ierr); 240 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 241 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 242 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 243 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 244 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 245 ierr = MatDestroy(&M2);CHKERRQ(ierr); 246 ierr = MatDestroy(&M3);CHKERRQ(ierr); 247 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 248 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 249 } 250 251 /* Get submatrices from subdomain matrix */ 252 if (n_vertices) { 253 PetscInt ibs,mbs; 254 PetscBool issbaij; 255 Mat newmat; 256 257 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 258 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 259 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 260 if (ibs != mbs) { /* need to convert to SEQAIJ */ 261 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 262 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 263 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 264 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 265 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 266 } else { 267 /* this is safe */ 268 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 269 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 270 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 271 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 272 /* which of the two approaches is faster? */ 273 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 274 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 275 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 276 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 277 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 278 } else { 279 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 280 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 281 } 282 } 283 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 284 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 285 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 286 } 287 288 /* Matrix of coarse basis functions (local) */ 289 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 290 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 291 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 292 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 293 if (pcbddc->switch_static || pcbddc->dbg_flag) { 294 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 295 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 296 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 297 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 298 } 299 300 if (pcbddc->dbg_flag) { 301 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 302 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 303 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 304 } 305 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 306 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 307 308 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 309 310 /* vertices */ 311 for (i=0;i<n_vertices;i++) { 312 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 313 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 314 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 315 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 316 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 317 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 318 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 319 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 320 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 321 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 322 if (n_constraints) { 323 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 324 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 325 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 326 } 327 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 328 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 329 330 /* Set values in coarse basis function and subdomain part of coarse_mat */ 331 /* coarse basis functions */ 332 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 333 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 334 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 335 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 336 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 337 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 338 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 339 if (pcbddc->switch_static || pcbddc->dbg_flag) { 340 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 341 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 342 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 343 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 344 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 345 } 346 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 347 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 348 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 349 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 350 if (n_constraints) { 351 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 352 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 353 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 354 } 355 356 /* check */ 357 if (pcbddc->dbg_flag) { 358 /* assemble subdomain vector on local nodes */ 359 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 360 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 361 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 362 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 363 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 364 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 365 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 366 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 367 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 368 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 369 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 370 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 371 if (n_constraints) { 372 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 373 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 374 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 375 } 376 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 377 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 378 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 379 /* check saddle point solution */ 380 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 381 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 382 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 383 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 384 /* shift by the identity matrix */ 385 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 386 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 387 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 388 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 389 } 390 } 391 392 /* constraints */ 393 for (i=0;i<n_constraints;i++) { 394 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 395 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 396 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 397 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 398 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 399 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 400 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 401 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 402 if (n_vertices) { 403 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 404 } 405 /* Set values in coarse basis function and subdomain part of coarse_mat */ 406 /* coarse basis functions */ 407 j = i+n_vertices; /* don't touch this! */ 408 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 409 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 410 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 411 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 412 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 413 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 414 if (pcbddc->switch_static || pcbddc->dbg_flag) { 415 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 416 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 417 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 418 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 419 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 420 } 421 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 422 if (n_vertices) { 423 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 424 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 425 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 426 } 427 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 428 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 429 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 430 431 if (pcbddc->dbg_flag) { 432 /* assemble subdomain vector on nodes */ 433 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 434 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 435 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 436 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 437 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 438 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 439 /* assemble subdomain vector of lagrange multipliers */ 440 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 441 if (n_vertices) { 442 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 443 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 444 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 445 } 446 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 447 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 448 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 449 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 450 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 451 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 452 /* check saddle point solution */ 453 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 454 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 456 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 457 /* shift by the identity matrix */ 458 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 459 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 460 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 461 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 462 } 463 } 464 /* call assembling routines for local coarse basis */ 465 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 467 if (pcbddc->switch_static || pcbddc->dbg_flag) { 468 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 469 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 470 } 471 472 /* compute other basis functions for non-symmetric problems */ 473 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 474 if (!setsym || (setsym && !issym)) { 475 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 476 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 477 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 478 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 479 if (pcbddc->switch_static || pcbddc->dbg_flag) { 480 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 481 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 482 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 483 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 484 } 485 for (i=0;i<pcbddc->local_primal_size;i++) { 486 if (n_constraints) { 487 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 488 for (j=0;j<n_constraints;j++) { 489 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 490 } 491 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 492 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 493 } 494 if (i<n_vertices) { 495 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 496 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 497 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 498 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 499 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 500 if (n_constraints) { 501 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 502 } 503 } else { 504 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 505 } 506 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 507 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 508 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 509 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 510 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 511 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 512 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 513 if (i<n_vertices) { 514 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 515 } 516 if (pcbddc->switch_static || pcbddc->dbg_flag) { 517 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 518 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 519 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 520 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 521 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 522 } 523 524 if (pcbddc->dbg_flag) { 525 /* assemble subdomain vector on nodes */ 526 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 527 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 528 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 529 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 530 if (i<n_vertices) { 531 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 532 } 533 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 534 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 535 /* assemble subdomain vector of lagrange multipliers */ 536 for (j=0;j<pcbddc->local_primal_size;j++) { 537 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 538 } 539 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 540 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 541 /* check saddle point solution */ 542 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 543 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 544 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 545 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 546 /* shift by the identity matrix */ 547 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 548 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 549 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 550 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 551 } 552 } 553 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 555 if (pcbddc->switch_static || pcbddc->dbg_flag) { 556 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 557 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 558 } 559 } 560 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 561 /* Checking coarse_sub_mat and coarse basis functios */ 562 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 563 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 564 if (pcbddc->dbg_flag) { 565 Mat coarse_sub_mat; 566 Mat AUXMAT,TM1,TM2,TM3,TM4; 567 Mat coarse_phi_D,coarse_phi_B; 568 Mat coarse_psi_D,coarse_psi_B; 569 Mat A_II,A_BB,A_IB,A_BI; 570 MatType checkmattype=MATSEQAIJ; 571 PetscReal real_value; 572 573 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 574 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 575 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 576 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 577 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 578 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 579 if (pcbddc->coarse_psi_B) { 580 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 581 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 582 } 583 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 584 585 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 586 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 587 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 588 if (pcbddc->coarse_psi_B) { 589 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 590 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 591 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 592 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 593 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 594 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 595 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 596 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 597 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 598 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 599 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 600 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 601 } else { 602 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 603 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 604 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 605 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 606 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 607 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 608 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 609 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 610 } 611 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 612 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 613 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 614 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 615 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 616 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 617 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 619 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 621 for (i=0;i<pcbddc->local_primal_size;i++) { 622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 623 } 624 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 625 for (i=0;i<pcbddc->local_primal_size;i++) { 626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 627 } 628 if (pcbddc->coarse_psi_B) { 629 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 630 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 632 } 633 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 634 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 636 } 637 } 638 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 639 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 640 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 641 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 642 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 643 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 644 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 645 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 646 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 647 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 648 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 649 if (pcbddc->coarse_psi_B) { 650 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 651 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 652 } 653 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 654 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 655 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 656 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 657 } 658 /* free memory */ 659 if (n_vertices) { 660 ierr = PetscFree(vertices);CHKERRQ(ierr); 661 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 662 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 663 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 664 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 665 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 666 } 667 if (n_constraints) { 668 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 669 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 670 ierr = MatDestroy(&M1);CHKERRQ(ierr); 671 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 672 } 673 ierr = PetscFree(auxindices);CHKERRQ(ierr); 674 /* get back data */ 675 *coarse_submat_vals_n = coarse_submat_vals; 676 PetscFunctionReturn(0); 677 } 678 679 #undef __FUNCT__ 680 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 681 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 682 { 683 PC_IS* pcis = (PC_IS*)(pc->data); 684 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 685 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 686 PetscBool issbaij,isbaij; 687 /* manage repeated solves */ 688 MatReuse reuse; 689 MatStructure matstruct; 690 PetscErrorCode ierr; 691 692 PetscFunctionBegin; 693 /* get mat flags */ 694 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 695 reuse = MAT_INITIAL_MATRIX; 696 if (pc->setupcalled) { 697 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 698 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 699 if (matstruct == SAME_NONZERO_PATTERN) { 700 reuse = MAT_REUSE_MATRIX; 701 } else { 702 reuse = MAT_INITIAL_MATRIX; 703 } 704 } 705 if (reuse == MAT_INITIAL_MATRIX) { 706 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 707 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 708 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 709 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 710 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 711 } 712 713 /* transform local matrices if needed */ 714 if (pcbddc->use_change_of_basis) { 715 Mat change_mat_all; 716 PetscScalar *row_cmat_values; 717 PetscInt *row_cmat_indices; 718 PetscInt *nnz,*is_indices,*temp_indices; 719 PetscInt i,j,k,n_D,n_B; 720 721 /* Get Non-overlapping dimensions */ 722 n_B = pcis->n_B; 723 n_D = pcis->n-n_B; 724 725 /* compute nonzero structure of change of basis on all local nodes */ 726 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 727 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 728 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 729 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 730 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 731 k=1; 732 for (i=0;i<n_B;i++) { 733 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 734 nnz[is_indices[i]]=j; 735 if (k < j) k = j; 736 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 737 } 738 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 739 /* assemble change of basis matrix on the whole set of local dofs */ 740 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 741 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 742 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 743 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 744 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 745 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 746 for (i=0;i<n_D;i++) { 747 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 748 } 749 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 750 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 751 for (i=0;i<n_B;i++) { 752 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 753 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 754 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 755 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 756 } 757 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 758 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 759 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 760 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 761 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 762 if (!issbaij && !isbaij) { 763 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 764 } else { 765 Mat work_mat; 766 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 767 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 768 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 769 } 770 /* 771 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 772 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 773 */ 774 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 775 ierr = PetscFree(nnz);CHKERRQ(ierr); 776 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 777 } else { 778 /* without change of basis, the local matrix is unchanged */ 779 if (!pcbddc->local_mat) { 780 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 781 pcbddc->local_mat = matis->A; 782 } 783 } 784 785 /* get submatrices */ 786 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 787 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 788 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 789 if (!issbaij) { 790 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 791 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 792 } else { 793 Mat newmat; 794 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 795 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 796 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 797 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 798 } 799 PetscFunctionReturn(0); 800 } 801 802 #undef __FUNCT__ 803 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 804 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 805 { 806 PC_IS* pcis = (PC_IS*)(pc->data); 807 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 808 IS is_aux1,is_aux2; 809 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 810 PetscInt n_vertices,i,j,n_R,n_D,n_B; 811 PetscInt vbs,bs; 812 PetscBT bitmask; 813 PetscErrorCode ierr; 814 815 PetscFunctionBegin; 816 /* Set Non-overlapping dimensions */ 817 n_B = pcis->n_B; n_D = pcis->n - n_B; 818 /* get vertex indices from constraint matrix */ 819 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 820 821 /* create auxiliary bitmask */ 822 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 823 for (i=0;i<n_vertices;i++) { 824 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 825 } 826 827 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 828 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 829 for (i=0, n_R=0; i<pcis->n; i++) { 830 if (!PetscBTLookup(bitmask,i)) { 831 idx_R_local[n_R] = i; 832 n_R++; 833 } 834 } 835 836 /* Block code */ 837 vbs = 1; 838 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 839 if (bs>1 && !(n_vertices%bs)) { 840 PetscBool is_blocked = PETSC_TRUE; 841 PetscInt *vary; 842 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 843 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 844 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 845 for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++; 846 for (i=0; i<n_vertices; i++) { 847 if (vary[i]!=0 && vary[i]!=bs) { 848 is_blocked = PETSC_FALSE; 849 break; 850 } 851 } 852 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 853 vbs = bs; 854 for (i=0;i<n_R/vbs;i++) { 855 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 856 } 857 } 858 ierr = PetscFree(vary);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(vertices);CHKERRQ(ierr); 861 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 862 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 863 864 /* print some info if requested */ 865 if (pcbddc->dbg_flag) { 866 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 867 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 868 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 869 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 870 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 871 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 872 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 873 } 874 875 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 876 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 877 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 878 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 879 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 880 for (i=0; i<n_D; i++) { 881 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 882 } 883 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 884 for (i=0, j=0; i<n_R; i++) { 885 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 886 aux_array1[j++] = i; 887 } 888 } 889 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 890 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 891 for (i=0, j=0; i<n_B; i++) { 892 if (!PetscBTLookup(bitmask,is_indices[i])) { 893 aux_array2[j++] = i; 894 } 895 } 896 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 897 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 898 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 899 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 900 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 901 902 if (pcbddc->switch_static || pcbddc->dbg_flag) { 903 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 904 for (i=0, j=0; i<n_R; i++) { 905 if (PetscBTLookup(bitmask,idx_R_local[i])) { 906 aux_array1[j++] = i; 907 } 908 } 909 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 910 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 911 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 912 } 913 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 914 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 915 PetscFunctionReturn(0); 916 } 917 918 919 #undef __FUNCT__ 920 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 921 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 922 { 923 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 924 PC_IS *pcis = (PC_IS*)pc->data; 925 PC pc_temp; 926 Mat A_RR; 927 Vec vec1,vec2,vec3; 928 MatStructure matstruct; 929 PetscScalar m_one = -1.0; 930 PetscReal value; 931 PetscInt n_D,n_R,ibs,mbs; 932 PetscBool use_exact,use_exact_reduced,issbaij; 933 PetscErrorCode ierr; 934 /* prefixes stuff */ 935 char dir_prefix[256],neu_prefix[256],str_level[3]; 936 size_t len; 937 938 PetscFunctionBegin; 939 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 940 941 /* compute prefixes */ 942 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 943 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 944 if (!pcbddc->current_level) { 945 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 946 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 947 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 948 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 949 } else { 950 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 951 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 952 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 953 len -= 15; /* remove "pc_bddc_coarse_" */ 954 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 955 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 956 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 957 *(dir_prefix+len)='\0'; 958 *(neu_prefix+len)='\0'; 959 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 960 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 961 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 962 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 963 } 964 965 /* DIRICHLET PROBLEM */ 966 /* Matrix for Dirichlet problem is pcis->A_II */ 967 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 968 if (!pcbddc->ksp_D) { /* create object if not yet build */ 969 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 970 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 971 /* default */ 972 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 973 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 974 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 975 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 976 if (issbaij) { 977 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 978 } else { 979 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 980 } 981 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 982 } 983 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 984 /* Allow user's customization */ 985 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 986 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 987 if (!n_D) { 988 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 989 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 990 } 991 /* Set Up KSP for Dirichlet problem of BDDC */ 992 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 993 /* set ksp_D into pcis data */ 994 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 995 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 996 pcis->ksp_D = pcbddc->ksp_D; 997 998 /* NEUMANN PROBLEM */ 999 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1000 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1001 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1002 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1003 if (ibs != mbs) { 1004 Mat newmat; 1005 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1006 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1007 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1008 } else { 1009 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1010 } 1011 if (!pcbddc->ksp_R) { /* create object if not yet build */ 1012 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1013 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1014 /* default */ 1015 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1016 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1017 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1018 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1019 if (issbaij) { 1020 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1021 } else { 1022 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1023 } 1024 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1025 } 1026 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1027 /* Allow user's customization */ 1028 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1029 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1030 if (!n_R) { 1031 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1032 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1033 } 1034 /* Set Up KSP for Neumann problem of BDDC */ 1035 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1036 1037 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1038 1039 /* Dirichlet */ 1040 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 1041 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1042 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1043 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 1044 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 1045 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1046 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1047 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1048 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1049 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1050 /* need to be adapted? */ 1051 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1052 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1053 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1054 /* print info */ 1055 if (pcbddc->dbg_flag) { 1056 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1057 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1058 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1059 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1060 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1061 } 1062 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1063 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1064 } 1065 1066 /* Neumann */ 1067 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 1068 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1069 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1070 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 1071 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 1072 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1073 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1074 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1075 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1076 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1077 /* need to be adapted? */ 1078 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1079 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1080 /* print info */ 1081 if (pcbddc->dbg_flag) { 1082 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1083 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1084 } 1085 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1086 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1087 } 1088 /* free Neumann problem's matrix */ 1089 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1095 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1096 { 1097 PetscErrorCode ierr; 1098 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1099 1100 PetscFunctionBegin; 1101 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1102 if (pcbddc->local_auxmat1) { 1103 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1104 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1105 } 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1111 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1112 { 1113 PetscErrorCode ierr; 1114 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1115 PC_IS* pcis = (PC_IS*) (pc->data); 1116 const PetscScalar zero = 0.0; 1117 1118 PetscFunctionBegin; 1119 /* Application of PHI^T (or PSI^T) */ 1120 if (pcbddc->coarse_psi_B) { 1121 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1122 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1123 } else { 1124 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1125 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1126 } 1127 /* Scatter data of coarse_rhs */ 1128 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1129 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1130 1131 /* Local solution on R nodes */ 1132 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1133 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1134 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1135 if (pcbddc->switch_static) { 1136 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1137 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 } 1139 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1140 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1141 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1143 if (pcbddc->switch_static) { 1144 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1145 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1146 } 1147 1148 /* Coarse solution */ 1149 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1150 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1151 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1152 } 1153 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1154 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1155 1156 /* Sum contributions from two levels */ 1157 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1158 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1164 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1165 { 1166 PetscErrorCode ierr; 1167 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1168 1169 PetscFunctionBegin; 1170 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1176 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1177 { 1178 PetscErrorCode ierr; 1179 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1180 1181 PetscFunctionBegin; 1182 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1183 PetscFunctionReturn(0); 1184 } 1185 1186 /* uncomment for testing purposes */ 1187 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1190 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1191 { 1192 PetscErrorCode ierr; 1193 PC_IS* pcis = (PC_IS*)(pc->data); 1194 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1195 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1196 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1197 MatType impMatType=MATSEQAIJ; 1198 /* one and zero */ 1199 PetscScalar one=1.0,zero=0.0; 1200 /* space to store constraints and their local indices */ 1201 PetscScalar *temp_quadrature_constraint; 1202 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1203 /* iterators */ 1204 PetscInt i,j,k,total_counts,temp_start_ptr; 1205 /* stuff to store connected components stored in pcbddc->mat_graph */ 1206 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1207 PetscInt n_ISForFaces,n_ISForEdges; 1208 /* near null space stuff */ 1209 MatNullSpace nearnullsp; 1210 const Vec *nearnullvecs; 1211 Vec *localnearnullsp; 1212 PetscBool nnsp_has_cnst; 1213 PetscInt nnsp_size; 1214 PetscScalar *array; 1215 /* BLAS integers */ 1216 PetscBLASInt lwork,lierr; 1217 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1218 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1219 /* LAPACK working arrays for SVD or POD */ 1220 PetscBool skip_lapack; 1221 PetscScalar *work; 1222 PetscReal *singular_vals; 1223 #if defined(PETSC_USE_COMPLEX) 1224 PetscReal *rwork; 1225 #endif 1226 #if defined(PETSC_MISSING_LAPACK_GESVD) 1227 PetscBLASInt Blas_one_2=1; 1228 PetscScalar *temp_basis,*correlation_mat; 1229 #else 1230 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1231 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1232 #endif 1233 /* change of basis */ 1234 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1235 PetscBool boolforchange,qr_needed; 1236 PetscBT touched,change_basis,qr_needed_idx; 1237 /* auxiliary stuff */ 1238 PetscInt *nnz,*is_indices,*local_to_B; 1239 /* some quantities */ 1240 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1241 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1242 1243 1244 PetscFunctionBegin; 1245 /* Destroy Mat objects computed previously */ 1246 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1247 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1248 /* Get index sets for faces, edges and vertices from graph */ 1249 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1250 pcbddc->use_vertices = PETSC_TRUE; 1251 } 1252 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1253 /* HACK: provide functions to set change of basis */ 1254 if (!ISForVertices && pcbddc->NullSpace) { 1255 pcbddc->use_change_of_basis = PETSC_TRUE; 1256 pcbddc->use_change_on_faces = PETSC_FALSE; 1257 } 1258 /* print some info */ 1259 if (pcbddc->dbg_flag) { 1260 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1261 i = 0; 1262 if (ISForVertices) { 1263 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1264 } 1265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1266 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1267 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1268 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1269 } 1270 /* check if near null space is attached to global mat */ 1271 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1272 if (nearnullsp) { 1273 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1274 } else { /* if near null space is not provided BDDC uses constants by default */ 1275 nnsp_size = 0; 1276 nnsp_has_cnst = PETSC_TRUE; 1277 } 1278 /* get max number of constraints on a single cc */ 1279 max_constraints = nnsp_size; 1280 if (nnsp_has_cnst) max_constraints++; 1281 1282 /* 1283 Evaluate maximum storage size needed by the procedure 1284 - temp_indices will contain start index of each constraint stored as follows 1285 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1286 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1287 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1288 */ 1289 total_counts = n_ISForFaces+n_ISForEdges; 1290 total_counts *= max_constraints; 1291 n_vertices = 0; 1292 if (ISForVertices) { 1293 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1294 } 1295 total_counts += n_vertices; 1296 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1297 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1298 total_counts = 0; 1299 max_size_of_constraint = 0; 1300 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1301 if (i<n_ISForEdges) { 1302 used_IS = &ISForEdges[i]; 1303 } else { 1304 used_IS = &ISForFaces[i-n_ISForEdges]; 1305 } 1306 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1307 total_counts += j; 1308 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1309 } 1310 total_counts *= max_constraints; 1311 total_counts += n_vertices; 1312 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1313 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1314 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1315 /* local to boundary numbering */ 1316 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1317 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1318 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1319 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1320 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1321 /* get local part of global near null space vectors */ 1322 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1323 for (k=0;k<nnsp_size;k++) { 1324 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1325 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1326 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1327 } 1328 1329 /* whether or not to skip lapack calls */ 1330 skip_lapack = PETSC_TRUE; 1331 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1332 1333 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1334 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1335 PetscScalar temp_work; 1336 #if defined(PETSC_MISSING_LAPACK_GESVD) 1337 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1338 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1339 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1340 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1341 #if defined(PETSC_USE_COMPLEX) 1342 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1343 #endif 1344 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1345 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1346 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1347 lwork = -1; 1348 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1349 #if !defined(PETSC_USE_COMPLEX) 1350 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1351 #else 1352 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1353 #endif 1354 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1355 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1356 #else /* on missing GESVD */ 1357 /* SVD */ 1358 PetscInt max_n,min_n; 1359 max_n = max_size_of_constraint; 1360 min_n = max_constraints; 1361 if (max_size_of_constraint < max_constraints) { 1362 min_n = max_size_of_constraint; 1363 max_n = max_constraints; 1364 } 1365 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1366 #if defined(PETSC_USE_COMPLEX) 1367 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1368 #endif 1369 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1370 lwork = -1; 1371 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1372 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1373 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1374 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1375 #if !defined(PETSC_USE_COMPLEX) 1376 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1377 #else 1378 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1379 #endif 1380 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1381 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1382 #endif /* on missing GESVD */ 1383 /* Allocate optimal workspace */ 1384 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1385 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1386 } 1387 /* Now we can loop on constraining sets */ 1388 total_counts = 0; 1389 temp_indices[0] = 0; 1390 /* vertices */ 1391 if (ISForVertices) { 1392 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1393 if (nnsp_has_cnst) { /* consider all vertices */ 1394 for (i=0;i<n_vertices;i++) { 1395 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1396 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1397 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1398 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1399 total_counts++; 1400 } 1401 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1402 PetscBool used_vertex; 1403 for (i=0;i<n_vertices;i++) { 1404 used_vertex = PETSC_FALSE; 1405 k = 0; 1406 while (!used_vertex && k<nnsp_size) { 1407 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1408 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1409 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1410 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1411 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1412 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1413 total_counts++; 1414 used_vertex = PETSC_TRUE; 1415 } 1416 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1417 k++; 1418 } 1419 } 1420 } 1421 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1422 n_vertices = total_counts; 1423 } 1424 1425 /* edges and faces */ 1426 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1427 if (i<n_ISForEdges) { 1428 used_IS = &ISForEdges[i]; 1429 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1430 } else { 1431 used_IS = &ISForFaces[i-n_ISForEdges]; 1432 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1433 } 1434 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1435 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1436 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1437 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1438 /* change of basis should not be performed on local periodic nodes */ 1439 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1440 if (nnsp_has_cnst) { 1441 PetscScalar quad_value; 1442 temp_constraints++; 1443 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1444 for (j=0;j<size_of_constraint;j++) { 1445 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1446 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1447 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1448 } 1449 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1450 total_counts++; 1451 } 1452 for (k=0;k<nnsp_size;k++) { 1453 PetscReal real_value; 1454 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1455 for (j=0;j<size_of_constraint;j++) { 1456 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1457 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1458 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1459 } 1460 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1461 /* check if array is null on the connected component */ 1462 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1463 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1464 if (real_value > 0.0) { /* keep indices and values */ 1465 temp_constraints++; 1466 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1467 total_counts++; 1468 } 1469 } 1470 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1471 valid_constraints = temp_constraints; 1472 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1473 if (!pcbddc->use_nnsp_true && temp_constraints) { 1474 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1475 1476 #if defined(PETSC_MISSING_LAPACK_GESVD) 1477 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1478 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1479 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1480 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1481 from that computed using LAPACKgesvd 1482 -> This is due to a different computation of eigenvectors in LAPACKheev 1483 -> The quality of the POD-computed basis will be the same */ 1484 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1485 /* Store upper triangular part of correlation matrix */ 1486 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1487 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1488 for (j=0;j<temp_constraints;j++) { 1489 for (k=0;k<j+1;k++) { 1490 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1491 } 1492 } 1493 /* compute eigenvalues and eigenvectors of correlation matrix */ 1494 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1495 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1496 #if !defined(PETSC_USE_COMPLEX) 1497 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1498 #else 1499 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1500 #endif 1501 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1502 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1503 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1504 j=0; 1505 while (j < temp_constraints && singular_vals[j] < tol) j++; 1506 total_counts=total_counts-j; 1507 valid_constraints = temp_constraints-j; 1508 /* scale and copy POD basis into used quadrature memory */ 1509 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1510 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1511 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1512 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1513 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1514 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1515 if (j<temp_constraints) { 1516 PetscInt ii; 1517 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1518 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1519 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1520 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1521 for (k=0;k<temp_constraints-j;k++) { 1522 for (ii=0;ii<size_of_constraint;ii++) { 1523 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1524 } 1525 } 1526 } 1527 #else /* on missing GESVD */ 1528 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1529 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1530 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1531 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1532 #if !defined(PETSC_USE_COMPLEX) 1533 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1534 #else 1535 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1536 #endif 1537 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1538 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1539 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1540 k = temp_constraints; 1541 if (k > size_of_constraint) k = size_of_constraint; 1542 j = 0; 1543 while (j < k && singular_vals[k-j-1] < tol) j++; 1544 total_counts = total_counts-temp_constraints+k-j; 1545 valid_constraints = k-j; 1546 #endif /* on missing GESVD */ 1547 } 1548 /* setting change_of_basis flag is safe now */ 1549 if (boolforchange) { 1550 for (j=0;j<valid_constraints;j++) { 1551 PetscBTSet(change_basis,total_counts-j-1); 1552 } 1553 } 1554 } 1555 /* free index sets of faces, edges and vertices */ 1556 for (i=0;i<n_ISForFaces;i++) { 1557 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1558 } 1559 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1560 for (i=0;i<n_ISForEdges;i++) { 1561 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1562 } 1563 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1564 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1565 1566 /* free workspace */ 1567 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1568 ierr = PetscFree(work);CHKERRQ(ierr); 1569 #if defined(PETSC_USE_COMPLEX) 1570 ierr = PetscFree(rwork);CHKERRQ(ierr); 1571 #endif 1572 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1573 #if defined(PETSC_MISSING_LAPACK_GESVD) 1574 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1575 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1576 #endif 1577 } 1578 for (k=0;k<nnsp_size;k++) { 1579 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1580 } 1581 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1582 1583 /* set quantities in pcbddc data structure */ 1584 /* n_vertices defines the number of subdomain corners in the primal space */ 1585 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1586 pcbddc->local_primal_size = total_counts; 1587 pcbddc->n_vertices = n_vertices; 1588 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1589 1590 /* Create constraint matrix */ 1591 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1592 /* so we need to set it up properly either with or without change of basis */ 1593 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1594 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1595 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1596 /* array to compute a local numbering of constraints : vertices first then constraints */ 1597 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1598 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1599 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1600 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1601 /* auxiliary stuff for basis change */ 1602 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1603 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1604 1605 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1606 total_primal_vertices=0; 1607 for (i=0;i<pcbddc->local_primal_size;i++) { 1608 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1609 if (size_of_constraint == 1) { 1610 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1611 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1612 aux_primal_minloc[total_primal_vertices]=0; 1613 total_primal_vertices++; 1614 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1615 PetscInt min_loc,min_index; 1616 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1617 /* find first untouched local node */ 1618 k = 0; 1619 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1620 min_index = global_indices[k]; 1621 min_loc = k; 1622 /* search the minimum among global nodes already untouched on the cc */ 1623 for (k=1;k<size_of_constraint;k++) { 1624 /* there can be more than one constraint on a single connected component */ 1625 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1626 min_index = global_indices[k]; 1627 min_loc = k; 1628 } 1629 } 1630 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1631 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1632 aux_primal_minloc[total_primal_vertices]=min_loc; 1633 total_primal_vertices++; 1634 } 1635 } 1636 /* determine if a QR strategy is needed for change of basis */ 1637 qr_needed = PETSC_FALSE; 1638 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1639 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1640 if (PetscBTLookup(change_basis,i)) { 1641 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1642 j = 0; 1643 for (k=0;k<size_of_constraint;k++) { 1644 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1645 j++; 1646 } 1647 } 1648 /* found more than one primal dof on the cc */ 1649 if (j > 1) { 1650 PetscBTSet(qr_needed_idx,i); 1651 qr_needed = PETSC_TRUE; 1652 } 1653 } 1654 } 1655 /* free workspace */ 1656 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1657 1658 /* permute indices in order to have a sorted set of vertices */ 1659 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1660 1661 /* nonzero structure of constraint matrix */ 1662 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1663 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1664 j=total_primal_vertices; 1665 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1666 if (!PetscBTLookup(change_basis,i)) { 1667 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1668 j++; 1669 } 1670 } 1671 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1672 ierr = PetscFree(nnz);CHKERRQ(ierr); 1673 /* set values in constraint matrix */ 1674 for (i=0;i<total_primal_vertices;i++) { 1675 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1676 } 1677 total_counts = total_primal_vertices; 1678 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1679 if (!PetscBTLookup(change_basis,i)) { 1680 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1681 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1682 total_counts++; 1683 } 1684 } 1685 /* assembling */ 1686 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1687 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 /* 1689 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1690 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1691 */ 1692 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1693 if (pcbddc->use_change_of_basis) { 1694 /* dual and primal dofs on a single cc */ 1695 PetscInt dual_dofs,primal_dofs; 1696 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1697 PetscInt primal_counter; 1698 /* working stuff for GEQRF */ 1699 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1700 PetscBLASInt lqr_work; 1701 /* working stuff for UNGQR */ 1702 PetscScalar *gqr_work,lgqr_work_t; 1703 PetscBLASInt lgqr_work; 1704 /* working stuff for TRTRS */ 1705 PetscScalar *trs_rhs; 1706 PetscBLASInt Blas_NRHS; 1707 /* pointers for values insertion into change of basis matrix */ 1708 PetscInt *start_rows,*start_cols; 1709 PetscScalar *start_vals; 1710 /* working stuff for values insertion */ 1711 PetscBT is_primal; 1712 1713 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1714 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1715 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1716 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1717 /* work arrays */ 1718 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1719 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1720 /* nonzeros per row */ 1721 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1722 if (PetscBTLookup(change_basis,i)) { 1723 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1724 if (PetscBTLookup(qr_needed_idx,i)) { 1725 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1726 } else { 1727 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1728 /* get local primal index on the cc */ 1729 j = 0; 1730 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1731 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1732 } 1733 } 1734 } 1735 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1736 ierr = PetscFree(nnz);CHKERRQ(ierr); 1737 /* Set initial identity in the matrix */ 1738 for (i=0;i<pcis->n_B;i++) { 1739 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1740 } 1741 1742 if (pcbddc->dbg_flag) { 1743 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1744 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1745 } 1746 1747 1748 /* Now we loop on the constraints which need a change of basis */ 1749 /* 1750 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1751 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1752 1753 Basic blocks of change of basis matrix T computed 1754 1755 - Using the following block transformation if there is only a primal dof on the cc 1756 (in the example, primal dof is the last one of the edge in LOCAL ordering 1757 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1758 | 1 0 ... 0 1 | 1759 | 0 1 ... 0 1 | 1760 | ... | 1761 | 0 ... 1 1 | 1762 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1763 1764 - via QR decomposition of constraints otherwise 1765 */ 1766 if (qr_needed) { 1767 /* space to store Q */ 1768 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1769 /* first we issue queries for optimal work */ 1770 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1771 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1772 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1773 lqr_work = -1; 1774 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1775 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1776 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1777 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1778 lgqr_work = -1; 1779 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1780 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1781 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1782 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1783 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1784 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1785 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1786 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1787 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1788 /* array to store scaling factors for reflectors */ 1789 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1790 /* array to store rhs and solution of triangular solver */ 1791 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1792 /* allocating workspace for check */ 1793 if (pcbddc->dbg_flag) { 1794 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1795 } 1796 } 1797 /* array to store whether a node is primal or not */ 1798 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1799 for (i=0;i<total_primal_vertices;i++) { 1800 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1801 } 1802 1803 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1804 /* -> using implicit ordering contained in temp_indices data */ 1805 total_counts = pcbddc->n_vertices; 1806 primal_counter = total_counts; 1807 while (total_counts<pcbddc->local_primal_size) { 1808 primal_dofs = 1; 1809 if (PetscBTLookup(change_basis,total_counts)) { 1810 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1811 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1812 primal_dofs++; 1813 } 1814 /* get constraint info */ 1815 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1816 dual_dofs = size_of_constraint-primal_dofs; 1817 1818 if (pcbddc->dbg_flag) { 1819 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1820 } 1821 1822 if (primal_dofs > 1) { /* QR */ 1823 1824 /* copy quadrature constraints for change of basis check */ 1825 if (pcbddc->dbg_flag) { 1826 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1827 } 1828 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1829 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1830 1831 /* compute QR decomposition of constraints */ 1832 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1833 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1834 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1835 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1836 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1837 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1838 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1839 1840 /* explictly compute R^-T */ 1841 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1842 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1843 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1844 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1845 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1846 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1847 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1848 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1849 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1850 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1851 1852 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1853 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1854 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1855 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1856 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1857 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1858 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1859 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1860 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1861 1862 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1863 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1864 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1865 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1866 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1867 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1868 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1869 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1870 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1871 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1872 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1873 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1874 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1875 1876 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1877 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1878 /* insert cols for primal dofs */ 1879 for (j=0;j<primal_dofs;j++) { 1880 start_vals = &qr_basis[j*size_of_constraint]; 1881 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1882 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1883 } 1884 /* insert cols for dual dofs */ 1885 for (j=0,k=0;j<dual_dofs;k++) { 1886 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1887 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1888 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1889 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1890 j++; 1891 } 1892 } 1893 1894 /* check change of basis */ 1895 if (pcbddc->dbg_flag) { 1896 PetscInt ii,jj; 1897 PetscBool valid_qr=PETSC_TRUE; 1898 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1899 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1900 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1901 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1902 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1903 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1904 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1905 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1906 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1907 for (jj=0;jj<size_of_constraint;jj++) { 1908 for (ii=0;ii<primal_dofs;ii++) { 1909 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1910 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1911 } 1912 } 1913 if (!valid_qr) { 1914 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1915 for (jj=0;jj<size_of_constraint;jj++) { 1916 for (ii=0;ii<primal_dofs;ii++) { 1917 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1918 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1919 } 1920 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1921 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1922 } 1923 } 1924 } 1925 } else { 1926 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1927 } 1928 } 1929 } else { /* simple transformation block */ 1930 PetscInt row,col; 1931 PetscScalar val; 1932 for (j=0;j<size_of_constraint;j++) { 1933 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 1934 if (!PetscBTLookup(is_primal,row)) { 1935 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1936 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 1937 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 1938 } else { 1939 for (k=0;k<size_of_constraint;k++) { 1940 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1941 if (row != col) { 1942 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1943 } else { 1944 val = 1.0; 1945 } 1946 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 1947 } 1948 } 1949 } 1950 } 1951 /* increment primal counter */ 1952 primal_counter += primal_dofs; 1953 } else { 1954 if (pcbddc->dbg_flag) { 1955 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1956 } 1957 } 1958 /* increment constraint counter total_counts */ 1959 total_counts += primal_dofs; 1960 } 1961 1962 /* free workspace */ 1963 if (qr_needed) { 1964 if (pcbddc->dbg_flag) { 1965 ierr = PetscFree(work);CHKERRQ(ierr); 1966 } 1967 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1968 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1969 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1970 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1971 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1972 } 1973 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1974 /* assembling */ 1975 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1976 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1977 /* 1978 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1979 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1980 */ 1981 } 1982 1983 /* flush dbg viewer */ 1984 if (pcbddc->dbg_flag) { 1985 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1986 } 1987 1988 /* free workspace */ 1989 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1990 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 1991 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1992 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1993 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1994 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1995 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1996 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1997 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1998 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1999 PetscFunctionReturn(0); 2000 } 2001 2002 #undef __FUNCT__ 2003 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2004 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2005 { 2006 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2007 PC_IS *pcis = (PC_IS*)pc->data; 2008 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2009 PetscInt bs,ierr,i,vertex_size; 2010 PetscViewer viewer=pcbddc->dbg_viewer; 2011 2012 PetscFunctionBegin; 2013 /* Reset previously computed graph */ 2014 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2015 /* Init local Graph struct */ 2016 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2017 2018 /* Check validity of the csr graph passed in by the user */ 2019 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2020 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2021 } 2022 2023 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2024 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2025 Mat mat_adj; 2026 const PetscInt *xadj,*adjncy; 2027 PetscBool flg_row=PETSC_TRUE; 2028 2029 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2030 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2031 if (!flg_row) { 2032 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2033 } 2034 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2035 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2036 if (!flg_row) { 2037 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2038 } 2039 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2040 } 2041 2042 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2043 vertex_size = 1; 2044 if (!pcbddc->user_provided_isfordofs) { 2045 if (!pcbddc->n_ISForDofs) { 2046 IS *custom_ISForDofs; 2047 2048 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2049 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2050 for (i=0;i<bs;i++) { 2051 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2052 } 2053 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2054 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2055 /* remove my references to IS objects */ 2056 for (i=0;i<bs;i++) { 2057 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2058 } 2059 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2060 } 2061 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2062 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2063 } 2064 2065 /* Setup of Graph */ 2066 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2067 2068 /* Graph's connected components analysis */ 2069 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2070 2071 /* print some info to stdout */ 2072 if (pcbddc->dbg_flag) { 2073 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2074 } 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2080 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2081 { 2082 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2083 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2084 PetscErrorCode ierr; 2085 2086 PetscFunctionBegin; 2087 n = 0; 2088 vertices = 0; 2089 if (pcbddc->ConstraintMatrix) { 2090 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2091 for (i=0;i<local_primal_size;i++) { 2092 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2093 if (size_of_constraint == 1) n++; 2094 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2095 } 2096 if (vertices_idx) { 2097 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2098 n = 0; 2099 for (i=0;i<local_primal_size;i++) { 2100 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2101 if (size_of_constraint == 1) { 2102 vertices[n++]=row_cmat_indices[0]; 2103 } 2104 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2105 } 2106 } 2107 } 2108 *n_vertices = n; 2109 if (vertices_idx) *vertices_idx = vertices; 2110 PetscFunctionReturn(0); 2111 } 2112 2113 #undef __FUNCT__ 2114 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2115 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2116 { 2117 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2118 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2119 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2120 PetscBT touched; 2121 PetscErrorCode ierr; 2122 2123 /* This function assumes that the number of local constraints per connected component 2124 is not greater than the number of nodes defined for the connected component 2125 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2126 PetscFunctionBegin; 2127 n = 0; 2128 constraints_index = 0; 2129 if (pcbddc->ConstraintMatrix) { 2130 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2131 max_size_of_constraint = 0; 2132 for (i=0;i<local_primal_size;i++) { 2133 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2134 if (size_of_constraint > 1) { 2135 n++; 2136 } 2137 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2138 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2139 } 2140 if (constraints_idx) { 2141 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2142 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2143 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2144 n = 0; 2145 for (i=0;i<local_primal_size;i++) { 2146 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2147 if (size_of_constraint > 1) { 2148 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2149 /* find first untouched local node */ 2150 j = 0; 2151 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2152 min_index = row_cmat_global_indices[j]; 2153 min_loc = j; 2154 /* search the minimum among nodes not yet touched on the connected component 2155 since there can be more than one constraint on a single cc */ 2156 for (j=1;j<size_of_constraint;j++) { 2157 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2158 min_index = row_cmat_global_indices[j]; 2159 min_loc = j; 2160 } 2161 } 2162 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2163 constraints_index[n++] = row_cmat_indices[min_loc]; 2164 } 2165 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2166 } 2167 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2168 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2169 } 2170 } 2171 *n_constraints = n; 2172 if (constraints_idx) *constraints_idx = constraints_index; 2173 PetscFunctionReturn(0); 2174 } 2175 2176 /* the next two functions has been adapted from pcis.c */ 2177 #undef __FUNCT__ 2178 #define __FUNCT__ "PCBDDCApplySchur" 2179 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2180 { 2181 PetscErrorCode ierr; 2182 PC_IS *pcis = (PC_IS*)(pc->data); 2183 2184 PetscFunctionBegin; 2185 if (!vec2_B) { vec2_B = v; } 2186 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2187 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2188 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2189 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2190 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2191 PetscFunctionReturn(0); 2192 } 2193 2194 #undef __FUNCT__ 2195 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2196 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2197 { 2198 PetscErrorCode ierr; 2199 PC_IS *pcis = (PC_IS*)(pc->data); 2200 2201 PetscFunctionBegin; 2202 if (!vec2_B) { vec2_B = v; } 2203 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2204 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2205 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2206 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2207 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2208 PetscFunctionReturn(0); 2209 } 2210 2211 #undef __FUNCT__ 2212 #define __FUNCT__ "PCBDDCSubsetNumbering" 2213 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2214 { 2215 Vec local_vec,global_vec; 2216 IS seqis,paris; 2217 VecScatter scatter_ctx; 2218 PetscScalar *array; 2219 PetscInt *temp_global_dofs; 2220 PetscScalar globalsum; 2221 PetscInt i,j,s; 2222 PetscInt nlocals,first_index,old_index,max_local; 2223 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2224 PetscMPIInt *dof_sizes,*dof_displs; 2225 PetscBool first_found; 2226 PetscErrorCode ierr; 2227 2228 PetscFunctionBegin; 2229 /* mpi buffers */ 2230 MPI_Comm_size(comm,&size_prec_comm); 2231 MPI_Comm_rank(comm,&rank_prec_comm); 2232 j = ( !rank_prec_comm ? size_prec_comm : 0); 2233 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2234 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2235 /* get maximum size of subset */ 2236 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2237 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2238 max_local = 0; 2239 if (n_local_dofs) { 2240 max_local = temp_global_dofs[0]; 2241 for (i=1;i<n_local_dofs;i++) { 2242 if (max_local < temp_global_dofs[i] ) { 2243 max_local = temp_global_dofs[i]; 2244 } 2245 } 2246 } 2247 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2248 max_global++; 2249 max_local = 0; 2250 if (n_local_dofs) { 2251 max_local = local_dofs[0]; 2252 for (i=1;i<n_local_dofs;i++) { 2253 if (max_local < local_dofs[i] ) { 2254 max_local = local_dofs[i]; 2255 } 2256 } 2257 } 2258 max_local++; 2259 /* allocate workspace */ 2260 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2261 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2262 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2263 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2264 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2265 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2266 /* create scatter */ 2267 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2268 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2269 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2270 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2271 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2272 /* init array */ 2273 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2274 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2275 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2276 if (local_dofs_mult) { 2277 for (i=0;i<n_local_dofs;i++) { 2278 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2279 } 2280 } else { 2281 for (i=0;i<n_local_dofs;i++) { 2282 array[local_dofs[i]]=1.0; 2283 } 2284 } 2285 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2286 /* scatter into global vec and get total number of global dofs */ 2287 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2288 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2289 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2290 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2291 /* Fill global_vec with cumulative function for global numbering */ 2292 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2293 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2294 nlocals = 0; 2295 first_index = -1; 2296 first_found = PETSC_FALSE; 2297 for (i=0;i<s;i++) { 2298 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2299 first_found = PETSC_TRUE; 2300 first_index = i; 2301 } 2302 nlocals += (PetscInt)PetscRealPart(array[i]); 2303 } 2304 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2305 if (!rank_prec_comm) { 2306 dof_displs[0]=0; 2307 for (i=1;i<size_prec_comm;i++) { 2308 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2309 } 2310 } 2311 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2312 if (first_found) { 2313 array[first_index] += (PetscScalar)nlocals; 2314 old_index = first_index; 2315 for (i=first_index+1;i<s;i++) { 2316 if (PetscRealPart(array[i]) > 0.0) { 2317 array[i] += array[old_index]; 2318 old_index = i; 2319 } 2320 } 2321 } 2322 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2323 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2324 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2325 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2326 /* get global ordering of local dofs */ 2327 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2328 if (local_dofs_mult) { 2329 for (i=0;i<n_local_dofs;i++) { 2330 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2331 } 2332 } else { 2333 for (i=0;i<n_local_dofs;i++) { 2334 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2335 } 2336 } 2337 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2338 /* free workspace */ 2339 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2340 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2341 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2342 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2343 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2344 /* return pointer to global ordering of local dofs */ 2345 *global_numbering_subset = temp_global_dofs; 2346 PetscFunctionReturn(0); 2347 } 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2351 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2352 { 2353 PetscInt i,j; 2354 PetscScalar *alphas; 2355 PetscErrorCode ierr; 2356 2357 PetscFunctionBegin; 2358 /* this implements stabilized Gram-Schmidt */ 2359 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2360 for (i=0;i<n;i++) { 2361 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2362 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2363 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2364 } 2365 ierr = PetscFree(alphas);CHKERRQ(ierr); 2366 PetscFunctionReturn(0); 2367 } 2368 2369 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatConvert_IS_AIJ" 2372 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2373 { 2374 Mat new_mat; 2375 Mat_IS *matis = (Mat_IS*)(mat->data); 2376 /* info on mat */ 2377 PetscInt bs,rows,cols; 2378 PetscInt lrows,lcols; 2379 PetscInt local_rows,local_cols; 2380 PetscMPIInt nsubdomains; 2381 /* preallocation */ 2382 Vec vec_dnz,vec_onz; 2383 PetscScalar *my_dnz,*my_onz,*array; 2384 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2385 PetscInt index_row,index_col,owner; 2386 PetscInt *local_indices,*global_indices; 2387 /* work */ 2388 PetscInt i,j; 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 /* CHECKS */ 2393 /* get info from mat */ 2394 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2395 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2396 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2397 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2398 2399 /* MAT_INITIAL_MATRIX */ 2400 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2401 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2402 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2403 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2404 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2405 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2406 2407 /* 2408 preallocation 2409 */ 2410 2411 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2412 /* 2413 Some vectors are needed to sum up properly on shared interface dofs. 2414 Note that preallocation is not exact, since it overestimates nonzeros 2415 */ 2416 /* 2417 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2418 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2419 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2420 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2421 */ 2422 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2423 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2424 /* All processes need to compute entire row ownership */ 2425 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2426 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2427 for (i=0;i<nsubdomains;i++) { 2428 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2429 row_ownership[j]=i; 2430 } 2431 } 2432 /* map indices of local mat to global values */ 2433 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2434 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2435 for (i=0;i<local_rows;i++) local_indices[i]=i; 2436 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2437 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2438 2439 /* 2440 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2441 then, they will be summed up properly. This way, preallocation is always sufficient 2442 */ 2443 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2444 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2445 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2446 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2447 for (i=0;i<local_rows;i++) { 2448 index_row = global_indices[i]; 2449 for (j=i;j<local_rows;j++) { 2450 owner = row_ownership[index_row]; 2451 index_col = global_indices[j]; 2452 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2453 my_dnz[i] += 1.0; 2454 } else { /* offdiag block */ 2455 my_onz[i] += 1.0; 2456 } 2457 /* same as before, interchanging rows and cols */ 2458 if (i != j) { 2459 owner = row_ownership[index_col]; 2460 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2461 my_dnz[j] += 1.0; 2462 } else { 2463 my_onz[j] += 1.0; 2464 } 2465 } 2466 } 2467 } 2468 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2469 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2470 if (local_rows) { /* multilevel guard */ 2471 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2472 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2473 } 2474 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2475 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2476 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2477 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2478 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2479 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2480 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2481 2482 /* set computed preallocation in dnz and onz */ 2483 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2484 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2485 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2486 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2487 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2488 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2489 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2490 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2491 2492 /* Resize preallocation if overestimated */ 2493 for (i=0;i<lrows;i++) { 2494 dnz[i] = PetscMin(dnz[i],lcols); 2495 onz[i] = PetscMin(onz[i],cols-lcols); 2496 } 2497 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2498 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2499 2500 /* 2501 Set values. Very Basic. 2502 */ 2503 for (i=0;i<local_rows;i++) { 2504 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2505 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2506 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2507 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2508 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2509 } 2510 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2511 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2512 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2513 2514 /* get back mat */ 2515 *M = new_mat; 2516 PetscFunctionReturn(0); 2517 } 2518 2519 #undef __FUNCT__ 2520 #define __FUNCT__ "MatISSubassemble_Private" 2521 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2522 { 2523 Mat subdomain_adj; 2524 IS new_ranks,ranks_send_to; 2525 MatPartitioning partitioner; 2526 Mat_IS *matis; 2527 PetscInt n_neighs,*neighs,*n_shared,**shared; 2528 PetscInt prank; 2529 PetscMPIInt size,rank,color; 2530 PetscInt *xadj,*adjncy,*oldranks; 2531 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2532 PetscInt i,j,n_subdomains,local_size,threshold=0; 2533 PetscErrorCode ierr; 2534 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2535 PetscSubcomm subcomm; 2536 2537 PetscFunctionBegin; 2538 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2539 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2540 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2541 2542 /* Get info on mapping */ 2543 matis = (Mat_IS*)(mat->data); 2544 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2545 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2546 2547 /* build local CSR graph of subdomains' connectivity */ 2548 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2549 xadj[0] = 0; 2550 xadj[1] = PetscMax(n_neighs-1,0); 2551 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2552 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2553 2554 if (threshold) { 2555 PetscInt* count,min_threshold; 2556 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2557 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2558 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2559 for (j=0;j<n_shared[i];j++) { 2560 count[shared[i][j]] += 1; 2561 } 2562 } 2563 /* adapt threshold since we dont want to lose significant connections */ 2564 min_threshold = n_neighs; 2565 for (i=1;i<n_neighs;i++) { 2566 for (j=0;j<n_shared[i];j++) { 2567 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2568 } 2569 } 2570 PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold); 2571 threshold = PetscMax(min_threshold+1,threshold); 2572 PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold); 2573 xadj[1] = 0; 2574 for (i=1;i<n_neighs;i++) { 2575 for (j=0;j<n_shared[i];j++) { 2576 if (count[shared[i][j]] < threshold) { 2577 adjncy[xadj[1]] = neighs[i]; 2578 adjncy_wgt[xadj[1]] = n_shared[i]; 2579 xadj[1]++; 2580 break; 2581 } 2582 } 2583 } 2584 ierr = PetscFree(count);CHKERRQ(ierr); 2585 } else { 2586 if (xadj[1]) { 2587 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2588 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2589 } 2590 } 2591 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2592 if (use_square) { 2593 for (i=0;i<xadj[1];i++) { 2594 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2595 } 2596 } 2597 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2598 2599 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2600 2601 /* 2602 Restrict work on active processes only. 2603 */ 2604 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2605 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2606 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2607 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2608 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2609 if (color) { 2610 ierr = PetscFree(xadj);CHKERRQ(ierr); 2611 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2612 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2613 } else { 2614 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2615 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2616 prank = rank; 2617 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2618 for (i=0;i<size;i++) { 2619 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2620 } 2621 for (i=0;i<xadj[1];i++) { 2622 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2623 } 2624 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2625 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2626 n_subdomains = (PetscInt)size; 2627 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2628 2629 /* Partition */ 2630 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2631 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2632 if (use_vwgt) { 2633 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2634 v_wgt[0] = local_size; 2635 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2636 } 2637 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2638 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2639 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2640 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2641 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2642 2643 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2644 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2645 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2646 /* clean up */ 2647 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2648 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2649 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2650 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2651 } 2652 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2653 2654 /* assemble parallel IS for sends */ 2655 i = 1; 2656 if (color) i=0; 2657 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2658 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2659 2660 /* get back IS */ 2661 *is_sends = ranks_send_to; 2662 PetscFunctionReturn(0); 2663 } 2664 2665 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2666 2667 #undef __FUNCT__ 2668 #define __FUNCT__ "MatISSubassemble" 2669 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2670 { 2671 Mat local_mat,new_mat; 2672 Mat_IS *matis; 2673 IS is_sends_internal; 2674 PetscInt rows,cols; 2675 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2676 PetscBool ismatis,isdense; 2677 ISLocalToGlobalMapping l2gmap; 2678 PetscInt* l2gmap_indices; 2679 const PetscInt* is_indices; 2680 MatType new_local_type; 2681 MatTypePrivate new_local_type_private; 2682 /* buffers */ 2683 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2684 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2685 /* MPI */ 2686 MPI_Comm comm; 2687 PetscMPIInt n_sends,n_recvs,commsize; 2688 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2689 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2690 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2691 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2692 PetscErrorCode ierr; 2693 2694 PetscFunctionBegin; 2695 /* checks */ 2696 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2697 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2698 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2699 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2700 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2701 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2702 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2703 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2704 PetscValidLogicalCollectiveInt(mat,bs,0); 2705 /* prepare IS for sending if not provided */ 2706 if (!is_sends) { 2707 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2708 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2709 } else { 2710 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2711 is_sends_internal = is_sends; 2712 } 2713 2714 /* get pointer of MATIS data */ 2715 matis = (Mat_IS*)mat->data; 2716 2717 /* get comm */ 2718 comm = PetscObjectComm((PetscObject)mat); 2719 2720 /* compute number of sends */ 2721 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2722 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2723 2724 /* compute number of receives */ 2725 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2726 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2727 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2728 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2729 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2730 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2731 ierr = PetscFree(iflags);CHKERRQ(ierr); 2732 2733 /* prepare send/receive buffers */ 2734 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2735 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2736 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2737 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2738 2739 /* Get data from local mat */ 2740 if (!isdense) { 2741 /* TODO: See below some guidelines on how to prepare the local buffers */ 2742 /* 2743 send_buffer_vals should contain the raw values of the local matrix 2744 send_buffer_idxs should contain: 2745 - MatType_PRIVATE type 2746 - PetscInt size_of_l2gmap 2747 - PetscInt global_row_indices[size_of_l2gmap] 2748 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2749 */ 2750 } else { 2751 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2752 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2753 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2754 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2755 send_buffer_idxs[1] = i; 2756 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2757 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2758 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2759 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2760 for (i=0;i<n_sends;i++) { 2761 ilengths_vals[is_indices[i]] = len*len; 2762 ilengths_idxs[is_indices[i]] = len+2; 2763 } 2764 } 2765 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2766 buf_size_idxs = 0; 2767 buf_size_vals = 0; 2768 for (i=0;i<n_recvs;i++) { 2769 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2770 buf_size_vals += (PetscInt)olengths_vals[i]; 2771 } 2772 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2773 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2774 2775 /* get new tags for clean communications */ 2776 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2777 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2778 2779 /* allocate for requests */ 2780 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2781 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2782 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2783 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2784 2785 /* communications */ 2786 ptr_idxs = recv_buffer_idxs; 2787 ptr_vals = recv_buffer_vals; 2788 for (i=0;i<n_recvs;i++) { 2789 source_dest = onodes[i]; 2790 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2791 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2792 ptr_idxs += olengths_idxs[i]; 2793 ptr_vals += olengths_vals[i]; 2794 } 2795 for (i=0;i<n_sends;i++) { 2796 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2797 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2798 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2799 } 2800 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2801 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2802 2803 /* assemble new l2g map */ 2804 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2805 ptr_idxs = recv_buffer_idxs; 2806 buf_size_idxs = 0; 2807 for (i=0;i<n_recvs;i++) { 2808 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2809 ptr_idxs += olengths_idxs[i]; 2810 } 2811 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2812 ptr_idxs = recv_buffer_idxs; 2813 buf_size_idxs = 0; 2814 for (i=0;i<n_recvs;i++) { 2815 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2816 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2817 ptr_idxs += olengths_idxs[i]; 2818 } 2819 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2820 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2821 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2822 2823 /* infer new local matrix type from received local matrices type */ 2824 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2825 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2826 new_local_type_private = MATAIJ_PRIVATE; 2827 new_local_type = MATSEQAIJ; 2828 if (n_recvs) { 2829 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 2830 ptr_idxs = recv_buffer_idxs; 2831 for (i=0;i<n_recvs;i++) { 2832 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2833 new_local_type_private = MATAIJ_PRIVATE; 2834 break; 2835 } 2836 ptr_idxs += olengths_idxs[i]; 2837 } 2838 switch (new_local_type_private) { 2839 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2840 new_local_type = MATSEQAIJ; 2841 bs = 1; 2842 break; 2843 case MATAIJ_PRIVATE: 2844 new_local_type = MATSEQAIJ; 2845 bs = 1; 2846 break; 2847 case MATBAIJ_PRIVATE: 2848 new_local_type = MATSEQBAIJ; 2849 break; 2850 case MATSBAIJ_PRIVATE: 2851 new_local_type = MATSEQSBAIJ; 2852 break; 2853 default: 2854 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2855 break; 2856 } 2857 } 2858 2859 /* create MATIS object */ 2860 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2861 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2862 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2863 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2864 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2865 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2866 2867 /* set values */ 2868 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2869 ptr_vals = recv_buffer_vals; 2870 ptr_idxs = recv_buffer_idxs; 2871 for (i=0;i<n_recvs;i++) { 2872 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2873 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2874 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2875 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2876 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2877 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2878 } 2879 ptr_idxs += olengths_idxs[i]; 2880 ptr_vals += olengths_vals[i]; 2881 } 2882 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2883 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2884 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2885 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2886 2887 { /* check */ 2888 Vec lvec,rvec; 2889 PetscReal infty_error; 2890 2891 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2892 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2893 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2894 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2895 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2896 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2897 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2898 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2899 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2900 } 2901 2902 /* free workspace */ 2903 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2904 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2905 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2906 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2907 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2908 if (isdense) { 2909 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2910 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2911 } else { 2912 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2913 } 2914 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2915 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2916 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2917 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2918 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2919 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2920 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2921 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2922 ierr = PetscFree(onodes);CHKERRQ(ierr); 2923 /* get back new mat */ 2924 *mat_n = new_mat; 2925 PetscFunctionReturn(0); 2926 } 2927 2928 #undef __FUNCT__ 2929 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2930 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2931 { 2932 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2933 PC_IS *pcis = (PC_IS*)pc->data; 2934 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2935 MatNullSpace CoarseNullSpace=NULL; 2936 ISLocalToGlobalMapping coarse_islg; 2937 IS coarse_is; 2938 PetscInt max_it,coarse_size,*local_primal_indices=NULL; 2939 PetscInt im_active=-1,active_procs=-1; 2940 PC pc_temp; 2941 PCType coarse_pc_type; 2942 KSPType coarse_ksp_type; 2943 PetscBool multilevel_requested,multilevel_allowed; 2944 PetscBool setsym,issym,isbddc,isnn; 2945 MatStructure matstruct; 2946 PetscErrorCode ierr; 2947 2948 PetscFunctionBegin; 2949 /* Assign global numbering to coarse dofs */ 2950 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2951 2952 /* infer some info from user */ 2953 issym = PETSC_FALSE; 2954 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2955 multilevel_allowed = PETSC_FALSE; 2956 multilevel_requested = PETSC_FALSE; 2957 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 2958 if (multilevel_requested) { 2959 /* count "active processes" */ 2960 im_active = !!(pcis->n); 2961 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2962 if (active_procs/pcbddc->coarsening_ratio < 2) { 2963 multilevel_allowed = PETSC_FALSE; 2964 } else { 2965 multilevel_allowed = PETSC_TRUE; 2966 } 2967 } 2968 2969 /* set defaults for coarse KSP and PC */ 2970 if (multilevel_allowed) { 2971 if (issym) { 2972 coarse_ksp_type = KSPRICHARDSON; 2973 } else { 2974 coarse_ksp_type = KSPCHEBYSHEV; 2975 } 2976 coarse_pc_type = PCBDDC; 2977 } else { 2978 coarse_ksp_type = KSPPREONLY; 2979 coarse_pc_type = PCREDUNDANT; 2980 } 2981 2982 /* create the coarse KSP object only once with defaults */ 2983 if (!pcbddc->coarse_ksp) { 2984 char prefix[256],str_level[3]; 2985 size_t len; 2986 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 2987 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2988 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 2989 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2990 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2991 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2992 /* prefix */ 2993 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 2994 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2995 if (!pcbddc->current_level) { 2996 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2997 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 2998 } else { 2999 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3000 if (pcbddc->current_level>1) len -= 2; 3001 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3002 *(prefix+len)='\0'; 3003 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3004 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3005 } 3006 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3007 } 3008 /* allow user customization */ 3009 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3010 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3011 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3012 3013 /* get some info after set from options */ 3014 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3015 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3016 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3017 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3018 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3019 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3020 isbddc = PETSC_FALSE; 3021 } 3022 3023 /* propagate BDDC info to the next level */ 3024 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3025 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3026 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3027 3028 /* creates temporary MATIS object for coarse matrix */ 3029 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3030 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3031 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3032 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3033 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3034 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3035 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3036 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3037 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3038 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 3039 3040 /* assemble coarse matrix */ 3041 if (isbddc || isnn) { 3042 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3043 } else { 3044 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3045 } 3046 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3047 3048 /* create local to global scatters for coarse problem */ 3049 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3050 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3051 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3052 3053 /* propagate symmetry info to coarse matrix */ 3054 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3055 3056 /* Compute coarse null space (special handling by BDDC only) */ 3057 if (pcbddc->NullSpace) { 3058 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3059 if (isbddc) { 3060 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3061 } else { 3062 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3063 } 3064 } 3065 3066 /* set operators */ 3067 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3068 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3069 3070 /* additional KSP customization */ 3071 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3072 if (max_it < 5) { 3073 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3074 } 3075 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3076 3077 3078 /* print some info if requested */ 3079 if (pcbddc->dbg_flag) { 3080 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3081 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3082 if (!multilevel_allowed) { 3083 if (multilevel_requested) { 3084 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3085 } else if (pcbddc->max_levels) { 3086 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3087 } 3088 } 3089 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3090 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3091 } 3092 3093 /* setup coarse ksp */ 3094 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3095 if (pcbddc->dbg_flag) { 3096 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3097 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3098 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3099 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3100 } 3101 3102 /* Check coarse problem if requested */ 3103 if (pcbddc->dbg_flag) { 3104 KSP check_ksp; 3105 KSPType check_ksp_type; 3106 PC check_pc; 3107 Vec check_vec; 3108 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3109 PetscInt its; 3110 PetscBool ispreonly,compute; 3111 3112 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3113 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3114 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3115 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3116 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3117 if (ispreonly) { 3118 check_ksp_type = KSPPREONLY; 3119 compute = PETSC_FALSE; 3120 } else { 3121 if (issym) check_ksp_type = KSPCG; 3122 else check_ksp_type = KSPGMRES; 3123 compute = PETSC_TRUE; 3124 } 3125 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3126 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3127 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3128 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3129 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3130 /* create random vec */ 3131 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3132 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3133 if (CoarseNullSpace) { 3134 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3135 } 3136 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3137 /* solve coarse problem */ 3138 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3139 if (CoarseNullSpace) { 3140 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3141 } 3142 /* check coarse problem residual error */ 3143 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3144 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3145 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3146 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3147 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3148 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3149 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3150 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3151 /* get eigenvalue estimation if preonly has not been requested */ 3152 if (!ispreonly) { 3153 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3154 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3155 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3156 } 3157 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3158 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3159 } 3160 /* free memory */ 3161 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3162 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3163 PetscFunctionReturn(0); 3164 } 3165 3166 #undef __FUNCT__ 3167 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3168 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3169 { 3170 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3171 PC_IS* pcis = (PC_IS*)pc->data; 3172 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3173 PetscInt i,j,coarse_size; 3174 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3175 PetscErrorCode ierr; 3176 3177 PetscFunctionBegin; 3178 /* get indices in local ordering for vertices and constraints */ 3179 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3180 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3181 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3182 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3183 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3184 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3185 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3186 3187 /* Compute global number of coarse dofs */ 3188 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3189 3190 /* check numbering */ 3191 if (pcbddc->dbg_flag) { 3192 PetscScalar coarsesum,*array; 3193 3194 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3195 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3196 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3197 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3198 for (i=0;i<pcbddc->local_primal_size;i++) { 3199 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3200 } 3201 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3202 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3203 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3204 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3205 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3206 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3207 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3208 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3209 for (i=0;i<pcis->n;i++) { 3210 if (array[i] == 1.0) { 3211 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3212 } 3213 } 3214 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3215 for (i=0;i<pcis->n;i++) { 3216 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3217 } 3218 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3219 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3220 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3221 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3222 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3223 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3224 if (pcbddc->dbg_flag > 1) { 3225 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3226 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3227 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3228 for (i=0;i<pcbddc->local_primal_size;i++) { 3229 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]); 3230 } 3231 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3232 } 3233 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3234 } 3235 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3236 /* get back data */ 3237 *coarse_size_n = coarse_size; 3238 *local_primal_indices_n = local_primal_indices; 3239 PetscFunctionReturn(0); 3240 } 3241 3242