1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCResetCustomization" 47 PetscErrorCode PCBDDCResetCustomization(PC pc) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 PetscInt i; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 56 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 59 for (i=0;i<pcbddc->n_ISForDofs;i++) { 60 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 61 } 62 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 63 pcbddc->n_ISForDofs = 0; 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "PCBDDCResetTopography" 69 PetscErrorCode PCBDDCResetTopography(PC pc) 70 { 71 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 72 PetscErrorCode ierr; 73 74 PetscFunctionBegin; 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 102 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "PCBDDCCreateWorkVectors" 111 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 112 { 113 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 114 PC_IS *pcis = (PC_IS*)pc->data; 115 VecType impVecType; 116 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 117 PetscErrorCode ierr; 118 119 PetscFunctionBegin; 120 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 121 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 122 local_primal_size = n_constraints+n_vertices; 123 n_R = pcis->n-n_vertices; 124 /* local work vectors */ 125 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 126 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 127 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 128 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 129 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 130 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 131 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 132 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 133 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 134 if (n_constraints) { 135 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 136 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 137 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "PCBDDCSetUpCorrection" 144 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 145 { 146 PetscErrorCode ierr; 147 /* pointers to pcis and pcbddc */ 148 PC_IS* pcis = (PC_IS*)pc->data; 149 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 150 /* submatrices of local problem */ 151 Mat A_RV,A_VR,A_VV; 152 /* working matrices */ 153 Mat M1,M2,M3,C_CR; 154 /* working vectors */ 155 Vec vec1_C,vec2_C,vec1_V,vec2_V; 156 /* additional working stuff */ 157 IS is_aux; 158 ISLocalToGlobalMapping BtoNmap; 159 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 160 const PetscScalar *array,*row_cmat_values; 161 const PetscInt *row_cmat_indices,*idx_R_local; 162 PetscInt *vertices,*idx_V_B,*auxindices; 163 PetscInt n_vertices,n_constraints,size_of_constraint; 164 PetscInt i,j,n_R,n_D,n_B; 165 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check; 166 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 167 MatType impMatType; 168 /* some shortcuts to scalars */ 169 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 170 /* for debugging purposes */ 171 PetscReal *coarsefunctions_errors,*constraints_errors; 172 173 PetscFunctionBegin; 174 /* get number of vertices and their local indices */ 175 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 176 n_constraints = pcbddc->local_primal_size-n_vertices; 177 /* Set Non-overlapping dimensions */ 178 n_B = pcis->n_B; n_D = pcis->n - n_B; 179 n_R = pcis->n-n_vertices; 180 181 /* Set types for local objects needed by BDDC precondtioner */ 182 impMatType = MATSEQDENSE; 183 184 /* Allocating some extra storage just to be safe */ 185 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 186 for (i=0;i<pcis->n;i++) auxindices[i]=i; 187 188 /* vertices in boundary numbering */ 189 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 190 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 191 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 192 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 193 if (i != n_vertices) { 194 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 195 } 196 197 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 198 if (n_constraints) { 199 /* work vectors */ 200 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 201 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 202 /* auxiliary matrices */ 203 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 204 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 205 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 206 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 207 208 /* Extract constraints on R nodes: C_{CR} */ 209 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 210 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 211 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 212 213 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 214 for (i=0;i<n_constraints;i++) { 215 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 216 /* Get row of constraint matrix in R numbering */ 217 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 218 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 219 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 220 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 221 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 222 /* Solve for row of constraint matrix in R numbering */ 223 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 224 /* Set values in local_auxmat2 */ 225 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 226 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 227 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 228 } 229 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 230 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 231 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 232 233 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 234 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 235 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 236 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 237 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 238 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 239 ierr = MatSetUp(M1);CHKERRQ(ierr); 240 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 241 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 242 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 243 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 244 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 245 ierr = MatDestroy(&M2);CHKERRQ(ierr); 246 ierr = MatDestroy(&M3);CHKERRQ(ierr); 247 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 248 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 249 } 250 251 /* Get submatrices from subdomain matrix */ 252 if (n_vertices) { 253 PetscInt ibs,mbs; 254 PetscBool issbaij; 255 Mat newmat; 256 257 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 258 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 259 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 260 if (ibs != mbs) { /* need to convert to SEQAIJ */ 261 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 262 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 263 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 264 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 265 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 266 } else { 267 /* this is safe */ 268 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 269 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 270 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 271 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 272 /* which of the two approaches is faster? */ 273 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 274 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 275 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 276 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 277 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 278 } else { 279 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 280 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 281 } 282 } 283 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 284 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 285 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 286 } 287 288 /* Matrix of coarse basis functions (local) */ 289 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 290 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 291 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 292 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 293 if (pcbddc->switch_static || pcbddc->dbg_flag) { 294 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 295 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 296 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 297 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 298 } 299 300 if (pcbddc->dbg_flag) { 301 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 302 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 303 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 304 } 305 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 306 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 307 308 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 309 310 /* vertices */ 311 for (i=0;i<n_vertices;i++) { 312 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 313 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 314 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 315 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 316 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 317 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 318 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 319 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 320 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 321 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 322 if (n_constraints) { 323 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 324 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 325 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 326 } 327 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 328 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 329 330 /* Set values in coarse basis function and subdomain part of coarse_mat */ 331 /* coarse basis functions */ 332 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 333 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 334 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 335 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 336 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 337 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 338 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 339 if (pcbddc->switch_static || pcbddc->dbg_flag) { 340 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 341 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 342 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 343 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 344 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 345 } 346 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 347 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 348 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 349 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 350 if (n_constraints) { 351 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 352 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 353 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 354 } 355 356 /* check */ 357 if (pcbddc->dbg_flag) { 358 /* assemble subdomain vector on local nodes */ 359 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 360 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 361 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 362 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 363 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 364 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 365 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 366 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 367 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 368 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 369 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 370 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 371 if (n_constraints) { 372 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 373 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 374 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 375 } 376 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 377 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 378 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 379 /* check saddle point solution */ 380 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 381 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 382 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 383 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 384 /* shift by the identity matrix */ 385 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 386 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 387 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 388 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 389 } 390 } 391 392 /* constraints */ 393 for (i=0;i<n_constraints;i++) { 394 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 395 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 396 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 397 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 398 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 399 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 400 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 401 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 402 if (n_vertices) { 403 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 404 } 405 /* Set values in coarse basis function and subdomain part of coarse_mat */ 406 /* coarse basis functions */ 407 j = i+n_vertices; /* don't touch this! */ 408 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 409 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 410 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 411 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 412 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 413 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 414 if (pcbddc->switch_static || pcbddc->dbg_flag) { 415 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 416 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 417 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 418 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 419 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 420 } 421 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 422 if (n_vertices) { 423 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 424 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 425 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 426 } 427 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 428 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 429 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 430 431 if (pcbddc->dbg_flag) { 432 /* assemble subdomain vector on nodes */ 433 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 434 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 435 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 436 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 437 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 438 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 439 /* assemble subdomain vector of lagrange multipliers */ 440 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 441 if (n_vertices) { 442 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 443 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 444 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 445 } 446 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 447 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 448 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 449 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 450 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 451 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 452 /* check saddle point solution */ 453 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 454 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 456 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 457 /* shift by the identity matrix */ 458 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 459 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 460 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 461 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 462 } 463 } 464 /* call assembling routines for local coarse basis */ 465 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 467 if (pcbddc->switch_static || pcbddc->dbg_flag) { 468 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 469 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 470 } 471 472 /* compute other basis functions for non-symmetric problems */ 473 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 474 if (!setsym || (setsym && !issym)) { 475 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 476 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 477 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 478 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 479 if (pcbddc->switch_static || pcbddc->dbg_flag) { 480 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 481 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 482 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 483 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 484 } 485 for (i=0;i<pcbddc->local_primal_size;i++) { 486 if (n_constraints) { 487 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 488 for (j=0;j<n_constraints;j++) { 489 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 490 } 491 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 492 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 493 } 494 if (i<n_vertices) { 495 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 496 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 497 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 498 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 499 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 500 if (n_constraints) { 501 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 502 } 503 } else { 504 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 505 } 506 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 507 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 508 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 509 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 510 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 511 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 512 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 513 if (i<n_vertices) { 514 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 515 } 516 if (pcbddc->switch_static || pcbddc->dbg_flag) { 517 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 518 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 519 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 520 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 521 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 522 } 523 524 if (pcbddc->dbg_flag) { 525 /* assemble subdomain vector on nodes */ 526 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 527 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 528 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 529 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 530 if (i<n_vertices) { 531 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 532 } 533 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 534 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 535 /* assemble subdomain vector of lagrange multipliers */ 536 for (j=0;j<pcbddc->local_primal_size;j++) { 537 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 538 } 539 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 540 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 541 /* check saddle point solution */ 542 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 543 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 544 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 545 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 546 /* shift by the identity matrix */ 547 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 548 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 549 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 550 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 551 } 552 } 553 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 555 if (pcbddc->switch_static || pcbddc->dbg_flag) { 556 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 557 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 558 } 559 unsymmetric_check = PETSC_TRUE; 560 } else { /* take references to already computed coarse basis */ 561 unsymmetric_check = PETSC_FALSE; 562 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 563 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 564 if (pcbddc->coarse_phi_D) { 565 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 566 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 567 } 568 } 569 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 570 /* Checking coarse_sub_mat and coarse basis functios */ 571 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 572 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 573 if (pcbddc->dbg_flag) { 574 Mat coarse_sub_mat; 575 Mat AUXMAT,TM1,TM2,TM3,TM4; 576 Mat coarse_phi_D,coarse_phi_B; 577 Mat coarse_psi_D,coarse_psi_B; 578 Mat A_II,A_BB,A_IB,A_BI; 579 MatType checkmattype=MATSEQAIJ; 580 PetscReal real_value; 581 582 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 583 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 584 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 585 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 586 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 587 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 588 if (unsymmetric_check) { 589 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 590 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 591 } 592 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 593 594 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 595 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 596 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 597 if (unsymmetric_check) { 598 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 599 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 600 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 601 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 602 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 603 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 604 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 605 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 606 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 607 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 608 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 609 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 610 } else { 611 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 612 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 613 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 614 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 615 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 616 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 617 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 618 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 619 } 620 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 621 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 622 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 623 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 624 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 625 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 628 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 629 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 630 for (i=0;i<pcbddc->local_primal_size;i++) { 631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 632 } 633 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 634 for (i=0;i<pcbddc->local_primal_size;i++) { 635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 636 } 637 if (unsymmetric_check) { 638 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 639 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 640 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 641 } 642 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 643 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 644 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 645 } 646 } 647 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 648 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 649 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 650 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 651 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 652 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 653 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 654 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 655 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 656 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 657 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 658 if (unsymmetric_check) { 659 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 660 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 661 } 662 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 663 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 664 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 665 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 666 } 667 /* free memory */ 668 if (n_vertices) { 669 ierr = PetscFree(vertices);CHKERRQ(ierr); 670 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 671 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 672 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 673 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 674 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 675 } 676 if (n_constraints) { 677 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 678 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 679 ierr = MatDestroy(&M1);CHKERRQ(ierr); 680 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 681 } 682 ierr = PetscFree(auxindices);CHKERRQ(ierr); 683 /* get back data */ 684 *coarse_submat_vals_n = coarse_submat_vals; 685 PetscFunctionReturn(0); 686 } 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 690 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 691 { 692 PC_IS* pcis = (PC_IS*)(pc->data); 693 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 694 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 695 PetscBool issbaij,isbaij; 696 /* manage repeated solves */ 697 MatReuse reuse; 698 MatStructure matstruct; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 /* get mat flags */ 703 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 704 reuse = MAT_INITIAL_MATRIX; 705 if (pc->setupcalled) { 706 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 707 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 708 if (matstruct == SAME_NONZERO_PATTERN) { 709 reuse = MAT_REUSE_MATRIX; 710 } else { 711 reuse = MAT_INITIAL_MATRIX; 712 } 713 } 714 if (reuse == MAT_INITIAL_MATRIX) { 715 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 717 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 718 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 719 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 720 } 721 722 /* transform local matrices if needed */ 723 if (pcbddc->use_change_of_basis) { 724 Mat change_mat_all; 725 PetscScalar *row_cmat_values; 726 PetscInt *row_cmat_indices; 727 PetscInt *nnz,*is_indices,*temp_indices; 728 PetscInt i,j,k,n_D,n_B; 729 730 /* Get Non-overlapping dimensions */ 731 n_B = pcis->n_B; 732 n_D = pcis->n-n_B; 733 734 /* compute nonzero structure of change of basis on all local nodes */ 735 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 736 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 737 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 738 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 739 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 740 k=1; 741 for (i=0;i<n_B;i++) { 742 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 743 nnz[is_indices[i]]=j; 744 if (k < j) k = j; 745 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 746 } 747 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 748 /* assemble change of basis matrix on the whole set of local dofs */ 749 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 750 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 751 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 752 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 753 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 754 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 755 for (i=0;i<n_D;i++) { 756 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 757 } 758 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 759 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 760 for (i=0;i<n_B;i++) { 761 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 762 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 763 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 764 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 765 } 766 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 768 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 769 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 770 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 771 if (!issbaij && !isbaij) { 772 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 773 } else { 774 Mat work_mat; 775 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 776 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 777 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 778 } 779 /* 780 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 781 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 782 */ 783 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 784 ierr = PetscFree(nnz);CHKERRQ(ierr); 785 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 786 } else { 787 /* without change of basis, the local matrix is unchanged */ 788 if (!pcbddc->local_mat) { 789 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 790 pcbddc->local_mat = matis->A; 791 } 792 } 793 794 /* get submatrices */ 795 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 796 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 797 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 798 if (!issbaij) { 799 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 800 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 801 } else { 802 Mat newmat; 803 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 804 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 805 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 806 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 807 } 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 813 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 814 { 815 PC_IS* pcis = (PC_IS*)(pc->data); 816 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 817 IS is_aux1,is_aux2; 818 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 819 PetscInt n_vertices,i,j,n_R,n_D,n_B; 820 PetscInt vbs,bs; 821 PetscBT bitmask; 822 PetscErrorCode ierr; 823 824 PetscFunctionBegin; 825 /* Set Non-overlapping dimensions */ 826 n_B = pcis->n_B; n_D = pcis->n - n_B; 827 /* get vertex indices from constraint matrix */ 828 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 829 830 /* create auxiliary bitmask */ 831 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 832 for (i=0;i<n_vertices;i++) { 833 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 834 } 835 836 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 837 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 838 for (i=0, n_R=0; i<pcis->n; i++) { 839 if (!PetscBTLookup(bitmask,i)) { 840 idx_R_local[n_R] = i; 841 n_R++; 842 } 843 } 844 845 /* Block code */ 846 vbs = 1; 847 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 848 if (bs>1 && !(n_vertices%bs)) { 849 PetscBool is_blocked = PETSC_TRUE; 850 PetscInt *vary; 851 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 852 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 853 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 854 for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++; 855 for (i=0; i<n_vertices; i++) { 856 if (vary[i]!=0 && vary[i]!=bs) { 857 is_blocked = PETSC_FALSE; 858 break; 859 } 860 } 861 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 862 vbs = bs; 863 for (i=0;i<n_R/vbs;i++) { 864 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 865 } 866 } 867 ierr = PetscFree(vary);CHKERRQ(ierr); 868 } 869 ierr = PetscFree(vertices);CHKERRQ(ierr); 870 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 871 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 872 873 /* print some info if requested */ 874 if (pcbddc->dbg_flag) { 875 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 876 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 877 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 878 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 879 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 880 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 881 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 882 } 883 884 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 885 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 886 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 887 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 888 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 889 for (i=0; i<n_D; i++) { 890 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 891 } 892 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 893 for (i=0, j=0; i<n_R; i++) { 894 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 895 aux_array1[j++] = i; 896 } 897 } 898 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 899 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 900 for (i=0, j=0; i<n_B; i++) { 901 if (!PetscBTLookup(bitmask,is_indices[i])) { 902 aux_array2[j++] = i; 903 } 904 } 905 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 906 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 907 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 908 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 909 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 910 911 if (pcbddc->switch_static || pcbddc->dbg_flag) { 912 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 913 for (i=0, j=0; i<n_R; i++) { 914 if (PetscBTLookup(bitmask,idx_R_local[i])) { 915 aux_array1[j++] = i; 916 } 917 } 918 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 919 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 920 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 921 } 922 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 923 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 924 PetscFunctionReturn(0); 925 } 926 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 930 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 931 { 932 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 933 PC_IS *pcis = (PC_IS*)pc->data; 934 PC pc_temp; 935 Mat A_RR; 936 Vec vec1,vec2,vec3; 937 MatStructure matstruct; 938 PetscScalar m_one = -1.0; 939 PetscReal value; 940 PetscInt n_D,n_R,ibs,mbs; 941 PetscBool use_exact,use_exact_reduced,issbaij; 942 PetscErrorCode ierr; 943 /* prefixes stuff */ 944 char dir_prefix[256],neu_prefix[256],str_level[3]; 945 size_t len; 946 947 PetscFunctionBegin; 948 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 949 950 /* compute prefixes */ 951 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 952 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 953 if (!pcbddc->current_level) { 954 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 955 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 956 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 957 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 958 } else { 959 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 960 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 961 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 962 len -= 15; /* remove "pc_bddc_coarse_" */ 963 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 964 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 965 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 966 *(dir_prefix+len)='\0'; 967 *(neu_prefix+len)='\0'; 968 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 969 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 970 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 971 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 972 } 973 974 /* DIRICHLET PROBLEM */ 975 /* Matrix for Dirichlet problem is pcis->A_II */ 976 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 977 if (!pcbddc->ksp_D) { /* create object if not yet build */ 978 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 979 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 980 /* default */ 981 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 982 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 983 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 984 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 985 if (issbaij) { 986 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 987 } else { 988 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 989 } 990 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 991 } 992 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 993 /* Allow user's customization */ 994 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 995 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 996 if (!n_D) { 997 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 998 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 999 } 1000 /* Set Up KSP for Dirichlet problem of BDDC */ 1001 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1002 /* set ksp_D into pcis data */ 1003 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1004 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1005 pcis->ksp_D = pcbddc->ksp_D; 1006 1007 /* NEUMANN PROBLEM */ 1008 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1009 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1010 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1011 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1012 if (ibs != mbs) { 1013 Mat newmat; 1014 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1015 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1016 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1017 } else { 1018 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1019 } 1020 if (!pcbddc->ksp_R) { /* create object if not yet build */ 1021 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1022 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1023 /* default */ 1024 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1025 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1026 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1027 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1028 if (issbaij) { 1029 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1030 } else { 1031 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1032 } 1033 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1034 } 1035 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1036 /* Allow user's customization */ 1037 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1038 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1039 if (!n_R) { 1040 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1041 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1042 } 1043 /* Set Up KSP for Neumann problem of BDDC */ 1044 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1045 1046 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1047 1048 /* Dirichlet */ 1049 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 1050 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1051 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1052 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 1053 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 1054 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1055 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1056 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1057 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1058 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1059 /* need to be adapted? */ 1060 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1061 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1062 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1063 /* print info */ 1064 if (pcbddc->dbg_flag) { 1065 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1066 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1067 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1068 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1069 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1070 } 1071 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1072 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1073 } 1074 1075 /* Neumann */ 1076 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 1077 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1078 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1079 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 1080 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 1081 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1082 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1083 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1084 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1085 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1086 /* need to be adapted? */ 1087 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1088 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1089 /* print info */ 1090 if (pcbddc->dbg_flag) { 1091 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1092 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1093 } 1094 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1095 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1096 } 1097 /* free Neumann problem's matrix */ 1098 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1104 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1105 { 1106 PetscErrorCode ierr; 1107 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1108 1109 PetscFunctionBegin; 1110 if (applytranspose) { 1111 if (pcbddc->local_auxmat1) { 1112 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1113 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1114 } 1115 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1116 } else { 1117 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1118 if (pcbddc->local_auxmat1) { 1119 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1120 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1121 } 1122 } 1123 PetscFunctionReturn(0); 1124 } 1125 1126 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1129 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1130 { 1131 PetscErrorCode ierr; 1132 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1133 PC_IS* pcis = (PC_IS*) (pc->data); 1134 const PetscScalar zero = 0.0; 1135 1136 PetscFunctionBegin; 1137 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1138 if (applytranspose) { 1139 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1140 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1141 } else { 1142 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1143 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1144 } 1145 /* Scatter data of coarse_rhs */ 1146 ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 1147 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1148 1149 /* Local solution on R nodes */ 1150 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1151 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1152 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1153 if (pcbddc->switch_static) { 1154 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1155 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1156 } 1157 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1158 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1159 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1160 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1161 if (pcbddc->switch_static) { 1162 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1163 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1164 } 1165 1166 /* Coarse solution */ 1167 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1168 /* TODO remove null space when doing multilevel */ 1169 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1170 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1171 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 1173 /* Sum contributions from two levels */ 1174 if (applytranspose) { 1175 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1176 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1177 } else { 1178 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1179 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1180 } 1181 PetscFunctionReturn(0); 1182 } 1183 1184 #undef __FUNCT__ 1185 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1186 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1187 { 1188 PetscErrorCode ierr; 1189 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1190 1191 PetscFunctionBegin; 1192 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1198 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1199 { 1200 PetscErrorCode ierr; 1201 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1202 1203 PetscFunctionBegin; 1204 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 /* uncomment for testing purposes */ 1209 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1212 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1213 { 1214 PetscErrorCode ierr; 1215 PC_IS* pcis = (PC_IS*)(pc->data); 1216 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1217 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1218 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1219 MatType impMatType=MATSEQAIJ; 1220 /* one and zero */ 1221 PetscScalar one=1.0,zero=0.0; 1222 /* space to store constraints and their local indices */ 1223 PetscScalar *temp_quadrature_constraint; 1224 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1225 /* iterators */ 1226 PetscInt i,j,k,total_counts,temp_start_ptr; 1227 /* stuff to store connected components stored in pcbddc->mat_graph */ 1228 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1229 PetscInt n_ISForFaces,n_ISForEdges; 1230 /* near null space stuff */ 1231 MatNullSpace nearnullsp; 1232 const Vec *nearnullvecs; 1233 Vec *localnearnullsp; 1234 PetscBool nnsp_has_cnst; 1235 PetscInt nnsp_size; 1236 PetscScalar *array; 1237 /* BLAS integers */ 1238 PetscBLASInt lwork,lierr; 1239 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1240 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1241 /* LAPACK working arrays for SVD or POD */ 1242 PetscBool skip_lapack; 1243 PetscScalar *work; 1244 PetscReal *singular_vals; 1245 #if defined(PETSC_USE_COMPLEX) 1246 PetscReal *rwork; 1247 #endif 1248 #if defined(PETSC_MISSING_LAPACK_GESVD) 1249 PetscBLASInt Blas_one_2=1; 1250 PetscScalar *temp_basis,*correlation_mat; 1251 #else 1252 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1253 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1254 #endif 1255 /* change of basis */ 1256 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1257 PetscBool boolforchange,qr_needed; 1258 PetscBT touched,change_basis,qr_needed_idx; 1259 /* auxiliary stuff */ 1260 PetscInt *nnz,*is_indices,*local_to_B; 1261 /* some quantities */ 1262 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1263 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1264 1265 1266 PetscFunctionBegin; 1267 /* Get index sets for faces, edges and vertices from graph */ 1268 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1269 pcbddc->use_vertices = PETSC_TRUE; 1270 } 1271 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1272 /* HACK: provide functions to set change of basis */ 1273 if (!ISForVertices && pcbddc->NullSpace) { 1274 pcbddc->use_change_of_basis = PETSC_TRUE; 1275 pcbddc->use_change_on_faces = PETSC_FALSE; 1276 } 1277 /* print some info */ 1278 if (pcbddc->dbg_flag) { 1279 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1280 i = 0; 1281 if (ISForVertices) { 1282 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1283 } 1284 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1285 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1287 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1288 } 1289 /* check if near null space is attached to global mat */ 1290 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1291 if (nearnullsp) { 1292 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1293 } else { /* if near null space is not provided BDDC uses constants by default */ 1294 nnsp_size = 0; 1295 nnsp_has_cnst = PETSC_TRUE; 1296 } 1297 /* get max number of constraints on a single cc */ 1298 max_constraints = nnsp_size; 1299 if (nnsp_has_cnst) max_constraints++; 1300 1301 /* 1302 Evaluate maximum storage size needed by the procedure 1303 - temp_indices will contain start index of each constraint stored as follows 1304 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1305 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1306 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1307 */ 1308 total_counts = n_ISForFaces+n_ISForEdges; 1309 total_counts *= max_constraints; 1310 n_vertices = 0; 1311 if (ISForVertices) { 1312 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1313 } 1314 total_counts += n_vertices; 1315 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1316 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1317 total_counts = 0; 1318 max_size_of_constraint = 0; 1319 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1320 if (i<n_ISForEdges) { 1321 used_IS = &ISForEdges[i]; 1322 } else { 1323 used_IS = &ISForFaces[i-n_ISForEdges]; 1324 } 1325 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1326 total_counts += j; 1327 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1328 } 1329 total_counts *= max_constraints; 1330 total_counts += n_vertices; 1331 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1332 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1333 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1334 /* local to boundary numbering */ 1335 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1336 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1337 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1338 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1339 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1340 /* get local part of global near null space vectors */ 1341 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1342 for (k=0;k<nnsp_size;k++) { 1343 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1344 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1345 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1346 } 1347 1348 /* whether or not to skip lapack calls */ 1349 skip_lapack = PETSC_TRUE; 1350 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1351 1352 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1353 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1354 PetscScalar temp_work; 1355 #if defined(PETSC_MISSING_LAPACK_GESVD) 1356 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1357 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1358 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1359 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1360 #if defined(PETSC_USE_COMPLEX) 1361 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1362 #endif 1363 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1364 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1365 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1366 lwork = -1; 1367 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1368 #if !defined(PETSC_USE_COMPLEX) 1369 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1370 #else 1371 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1372 #endif 1373 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1374 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1375 #else /* on missing GESVD */ 1376 /* SVD */ 1377 PetscInt max_n,min_n; 1378 max_n = max_size_of_constraint; 1379 min_n = max_constraints; 1380 if (max_size_of_constraint < max_constraints) { 1381 min_n = max_size_of_constraint; 1382 max_n = max_constraints; 1383 } 1384 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1385 #if defined(PETSC_USE_COMPLEX) 1386 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1387 #endif 1388 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1389 lwork = -1; 1390 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1391 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1392 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1393 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1394 #if !defined(PETSC_USE_COMPLEX) 1395 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1396 #else 1397 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1398 #endif 1399 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1400 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1401 #endif /* on missing GESVD */ 1402 /* Allocate optimal workspace */ 1403 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1404 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1405 } 1406 /* Now we can loop on constraining sets */ 1407 total_counts = 0; 1408 temp_indices[0] = 0; 1409 /* vertices */ 1410 if (ISForVertices) { 1411 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1412 if (nnsp_has_cnst) { /* consider all vertices */ 1413 for (i=0;i<n_vertices;i++) { 1414 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1415 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1416 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1417 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1418 total_counts++; 1419 } 1420 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1421 PetscBool used_vertex; 1422 for (i=0;i<n_vertices;i++) { 1423 used_vertex = PETSC_FALSE; 1424 k = 0; 1425 while (!used_vertex && k<nnsp_size) { 1426 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1427 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1428 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1429 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1430 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1431 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1432 total_counts++; 1433 used_vertex = PETSC_TRUE; 1434 } 1435 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1436 k++; 1437 } 1438 } 1439 } 1440 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1441 n_vertices = total_counts; 1442 } 1443 1444 /* edges and faces */ 1445 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1446 if (i<n_ISForEdges) { 1447 used_IS = &ISForEdges[i]; 1448 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1449 } else { 1450 used_IS = &ISForFaces[i-n_ISForEdges]; 1451 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1452 } 1453 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1454 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1455 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1456 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1457 /* change of basis should not be performed on local periodic nodes */ 1458 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1459 if (nnsp_has_cnst) { 1460 PetscScalar quad_value; 1461 temp_constraints++; 1462 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1463 for (j=0;j<size_of_constraint;j++) { 1464 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1465 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1466 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1467 } 1468 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1469 total_counts++; 1470 } 1471 for (k=0;k<nnsp_size;k++) { 1472 PetscReal real_value; 1473 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1474 for (j=0;j<size_of_constraint;j++) { 1475 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1476 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1477 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1478 } 1479 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1480 /* check if array is null on the connected component */ 1481 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1482 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1483 if (real_value > 0.0) { /* keep indices and values */ 1484 temp_constraints++; 1485 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1486 total_counts++; 1487 } 1488 } 1489 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1490 valid_constraints = temp_constraints; 1491 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1492 if (!pcbddc->use_nnsp_true && temp_constraints) { 1493 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1494 1495 #if defined(PETSC_MISSING_LAPACK_GESVD) 1496 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1497 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1498 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1499 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1500 from that computed using LAPACKgesvd 1501 -> This is due to a different computation of eigenvectors in LAPACKheev 1502 -> The quality of the POD-computed basis will be the same */ 1503 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1504 /* Store upper triangular part of correlation matrix */ 1505 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1506 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1507 for (j=0;j<temp_constraints;j++) { 1508 for (k=0;k<j+1;k++) { 1509 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1510 } 1511 } 1512 /* compute eigenvalues and eigenvectors of correlation matrix */ 1513 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1514 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1515 #if !defined(PETSC_USE_COMPLEX) 1516 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1517 #else 1518 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1519 #endif 1520 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1521 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1522 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1523 j=0; 1524 while (j < temp_constraints && singular_vals[j] < tol) j++; 1525 total_counts=total_counts-j; 1526 valid_constraints = temp_constraints-j; 1527 /* scale and copy POD basis into used quadrature memory */ 1528 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1529 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1530 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1531 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1532 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1533 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1534 if (j<temp_constraints) { 1535 PetscInt ii; 1536 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1537 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1538 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1539 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1540 for (k=0;k<temp_constraints-j;k++) { 1541 for (ii=0;ii<size_of_constraint;ii++) { 1542 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1543 } 1544 } 1545 } 1546 #else /* on missing GESVD */ 1547 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1548 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1549 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1550 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1551 #if !defined(PETSC_USE_COMPLEX) 1552 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1553 #else 1554 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1555 #endif 1556 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1557 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1558 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1559 k = temp_constraints; 1560 if (k > size_of_constraint) k = size_of_constraint; 1561 j = 0; 1562 while (j < k && singular_vals[k-j-1] < tol) j++; 1563 total_counts = total_counts-temp_constraints+k-j; 1564 valid_constraints = k-j; 1565 #endif /* on missing GESVD */ 1566 } 1567 /* setting change_of_basis flag is safe now */ 1568 if (boolforchange) { 1569 for (j=0;j<valid_constraints;j++) { 1570 PetscBTSet(change_basis,total_counts-j-1); 1571 } 1572 } 1573 } 1574 /* free index sets of faces, edges and vertices */ 1575 for (i=0;i<n_ISForFaces;i++) { 1576 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1577 } 1578 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1579 for (i=0;i<n_ISForEdges;i++) { 1580 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1581 } 1582 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1583 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1584 1585 /* free workspace */ 1586 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1587 ierr = PetscFree(work);CHKERRQ(ierr); 1588 #if defined(PETSC_USE_COMPLEX) 1589 ierr = PetscFree(rwork);CHKERRQ(ierr); 1590 #endif 1591 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1592 #if defined(PETSC_MISSING_LAPACK_GESVD) 1593 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1594 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1595 #endif 1596 } 1597 for (k=0;k<nnsp_size;k++) { 1598 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1599 } 1600 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1601 1602 /* set quantities in pcbddc data structure */ 1603 /* n_vertices defines the number of subdomain corners in the primal space */ 1604 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1605 pcbddc->local_primal_size = total_counts; 1606 pcbddc->n_vertices = n_vertices; 1607 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1608 1609 /* Create constraint matrix */ 1610 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1611 /* so we need to set it up properly either with or without change of basis */ 1612 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1613 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1614 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1615 /* array to compute a local numbering of constraints : vertices first then constraints */ 1616 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1617 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1618 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1619 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1620 /* auxiliary stuff for basis change */ 1621 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1622 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1623 1624 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1625 total_primal_vertices=0; 1626 for (i=0;i<pcbddc->local_primal_size;i++) { 1627 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1628 if (size_of_constraint == 1) { 1629 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1630 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1631 aux_primal_minloc[total_primal_vertices]=0; 1632 total_primal_vertices++; 1633 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1634 PetscInt min_loc,min_index; 1635 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1636 /* find first untouched local node */ 1637 k = 0; 1638 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1639 min_index = global_indices[k]; 1640 min_loc = k; 1641 /* search the minimum among global nodes already untouched on the cc */ 1642 for (k=1;k<size_of_constraint;k++) { 1643 /* there can be more than one constraint on a single connected component */ 1644 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1645 min_index = global_indices[k]; 1646 min_loc = k; 1647 } 1648 } 1649 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1650 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1651 aux_primal_minloc[total_primal_vertices]=min_loc; 1652 total_primal_vertices++; 1653 } 1654 } 1655 /* determine if a QR strategy is needed for change of basis */ 1656 qr_needed = PETSC_FALSE; 1657 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1658 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1659 if (PetscBTLookup(change_basis,i)) { 1660 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1661 j = 0; 1662 for (k=0;k<size_of_constraint;k++) { 1663 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1664 j++; 1665 } 1666 } 1667 /* found more than one primal dof on the cc */ 1668 if (j > 1) { 1669 PetscBTSet(qr_needed_idx,i); 1670 qr_needed = PETSC_TRUE; 1671 } 1672 } 1673 } 1674 /* free workspace */ 1675 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1676 1677 /* permute indices in order to have a sorted set of vertices */ 1678 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1679 1680 /* nonzero structure of constraint matrix */ 1681 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1682 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1683 j=total_primal_vertices; 1684 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1685 if (!PetscBTLookup(change_basis,i)) { 1686 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1687 j++; 1688 } 1689 } 1690 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1691 ierr = PetscFree(nnz);CHKERRQ(ierr); 1692 /* set values in constraint matrix */ 1693 for (i=0;i<total_primal_vertices;i++) { 1694 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1695 } 1696 total_counts = total_primal_vertices; 1697 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1698 if (!PetscBTLookup(change_basis,i)) { 1699 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1700 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1701 total_counts++; 1702 } 1703 } 1704 /* assembling */ 1705 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1706 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1707 /* 1708 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1709 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1710 */ 1711 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1712 if (pcbddc->use_change_of_basis) { 1713 /* dual and primal dofs on a single cc */ 1714 PetscInt dual_dofs,primal_dofs; 1715 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1716 PetscInt primal_counter; 1717 /* working stuff for GEQRF */ 1718 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1719 PetscBLASInt lqr_work; 1720 /* working stuff for UNGQR */ 1721 PetscScalar *gqr_work,lgqr_work_t; 1722 PetscBLASInt lgqr_work; 1723 /* working stuff for TRTRS */ 1724 PetscScalar *trs_rhs; 1725 PetscBLASInt Blas_NRHS; 1726 /* pointers for values insertion into change of basis matrix */ 1727 PetscInt *start_rows,*start_cols; 1728 PetscScalar *start_vals; 1729 /* working stuff for values insertion */ 1730 PetscBT is_primal; 1731 1732 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1733 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1734 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1735 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1736 /* work arrays */ 1737 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1738 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1739 /* nonzeros per row */ 1740 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1741 if (PetscBTLookup(change_basis,i)) { 1742 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1743 if (PetscBTLookup(qr_needed_idx,i)) { 1744 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1745 } else { 1746 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1747 /* get local primal index on the cc */ 1748 j = 0; 1749 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1750 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1751 } 1752 } 1753 } 1754 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1755 ierr = PetscFree(nnz);CHKERRQ(ierr); 1756 /* Set initial identity in the matrix */ 1757 for (i=0;i<pcis->n_B;i++) { 1758 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1759 } 1760 1761 if (pcbddc->dbg_flag) { 1762 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1763 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1764 } 1765 1766 1767 /* Now we loop on the constraints which need a change of basis */ 1768 /* 1769 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1770 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1771 1772 Basic blocks of change of basis matrix T computed 1773 1774 - Using the following block transformation if there is only a primal dof on the cc 1775 (in the example, primal dof is the last one of the edge in LOCAL ordering 1776 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1777 | 1 0 ... 0 1 | 1778 | 0 1 ... 0 1 | 1779 | ... | 1780 | 0 ... 1 1 | 1781 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1782 1783 - via QR decomposition of constraints otherwise 1784 */ 1785 if (qr_needed) { 1786 /* space to store Q */ 1787 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1788 /* first we issue queries for optimal work */ 1789 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1790 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1791 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1792 lqr_work = -1; 1793 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1794 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1795 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1796 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1797 lgqr_work = -1; 1798 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1799 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1800 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1801 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1802 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1803 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1804 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1805 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1806 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1807 /* array to store scaling factors for reflectors */ 1808 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1809 /* array to store rhs and solution of triangular solver */ 1810 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1811 /* allocating workspace for check */ 1812 if (pcbddc->dbg_flag) { 1813 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1814 } 1815 } 1816 /* array to store whether a node is primal or not */ 1817 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1818 for (i=0;i<total_primal_vertices;i++) { 1819 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1820 } 1821 1822 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1823 /* -> using implicit ordering contained in temp_indices data */ 1824 total_counts = pcbddc->n_vertices; 1825 primal_counter = total_counts; 1826 while (total_counts<pcbddc->local_primal_size) { 1827 primal_dofs = 1; 1828 if (PetscBTLookup(change_basis,total_counts)) { 1829 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1830 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1831 primal_dofs++; 1832 } 1833 /* get constraint info */ 1834 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1835 dual_dofs = size_of_constraint-primal_dofs; 1836 1837 if (pcbddc->dbg_flag) { 1838 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1839 } 1840 1841 if (primal_dofs > 1) { /* QR */ 1842 1843 /* copy quadrature constraints for change of basis check */ 1844 if (pcbddc->dbg_flag) { 1845 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1846 } 1847 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1848 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1849 1850 /* compute QR decomposition of constraints */ 1851 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1852 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1853 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1854 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1855 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1856 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1857 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1858 1859 /* explictly compute R^-T */ 1860 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1861 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1862 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1863 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1864 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1865 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1866 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1867 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1868 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1869 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1870 1871 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1872 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1873 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1874 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1875 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1876 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1877 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1878 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1879 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1880 1881 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1882 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1883 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1884 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1885 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1886 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1887 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1888 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1889 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1890 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1891 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1892 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1893 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1894 1895 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1896 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1897 /* insert cols for primal dofs */ 1898 for (j=0;j<primal_dofs;j++) { 1899 start_vals = &qr_basis[j*size_of_constraint]; 1900 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1901 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1902 } 1903 /* insert cols for dual dofs */ 1904 for (j=0,k=0;j<dual_dofs;k++) { 1905 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1906 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1907 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1908 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1909 j++; 1910 } 1911 } 1912 1913 /* check change of basis */ 1914 if (pcbddc->dbg_flag) { 1915 PetscInt ii,jj; 1916 PetscBool valid_qr=PETSC_TRUE; 1917 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1918 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1919 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1920 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1921 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1922 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1923 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1924 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1925 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1926 for (jj=0;jj<size_of_constraint;jj++) { 1927 for (ii=0;ii<primal_dofs;ii++) { 1928 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1929 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1930 } 1931 } 1932 if (!valid_qr) { 1933 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1934 for (jj=0;jj<size_of_constraint;jj++) { 1935 for (ii=0;ii<primal_dofs;ii++) { 1936 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1937 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1938 } 1939 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1940 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1941 } 1942 } 1943 } 1944 } else { 1945 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1946 } 1947 } 1948 } else { /* simple transformation block */ 1949 PetscInt row,col; 1950 PetscScalar val; 1951 for (j=0;j<size_of_constraint;j++) { 1952 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 1953 if (!PetscBTLookup(is_primal,row)) { 1954 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1955 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 1956 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 1957 } else { 1958 for (k=0;k<size_of_constraint;k++) { 1959 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1960 if (row != col) { 1961 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1962 } else { 1963 val = 1.0; 1964 } 1965 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 1966 } 1967 } 1968 } 1969 } 1970 /* increment primal counter */ 1971 primal_counter += primal_dofs; 1972 } else { 1973 if (pcbddc->dbg_flag) { 1974 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1975 } 1976 } 1977 /* increment constraint counter total_counts */ 1978 total_counts += primal_dofs; 1979 } 1980 1981 /* free workspace */ 1982 if (qr_needed) { 1983 if (pcbddc->dbg_flag) { 1984 ierr = PetscFree(work);CHKERRQ(ierr); 1985 } 1986 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1987 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1988 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1989 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1990 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1991 } 1992 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1993 /* assembling */ 1994 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1995 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1996 /* 1997 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1998 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1999 */ 2000 } 2001 2002 /* flush dbg viewer */ 2003 if (pcbddc->dbg_flag) { 2004 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2005 } 2006 2007 /* free workspace */ 2008 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2009 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2010 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2011 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2012 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2013 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2014 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2015 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2016 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2017 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 #undef __FUNCT__ 2022 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2023 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2024 { 2025 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2026 PC_IS *pcis = (PC_IS*)pc->data; 2027 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2028 PetscInt bs,ierr,i,vertex_size; 2029 PetscViewer viewer=pcbddc->dbg_viewer; 2030 2031 PetscFunctionBegin; 2032 /* Init local Graph struct */ 2033 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2034 2035 /* Check validity of the csr graph passed in by the user */ 2036 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2037 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2038 } 2039 2040 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2041 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2042 Mat mat_adj; 2043 const PetscInt *xadj,*adjncy; 2044 PetscBool flg_row=PETSC_TRUE; 2045 2046 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2047 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2048 if (!flg_row) { 2049 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2050 } 2051 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2052 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2053 if (!flg_row) { 2054 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2055 } 2056 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2057 } 2058 2059 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2060 vertex_size = 1; 2061 if (!pcbddc->user_provided_isfordofs) { 2062 if (!pcbddc->n_ISForDofs) { 2063 IS *custom_ISForDofs; 2064 2065 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2066 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2067 for (i=0;i<bs;i++) { 2068 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2069 } 2070 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2071 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2072 /* remove my references to IS objects */ 2073 for (i=0;i<bs;i++) { 2074 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2075 } 2076 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2077 } 2078 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2079 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2080 } 2081 2082 /* Setup of Graph */ 2083 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2084 2085 /* Graph's connected components analysis */ 2086 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2087 2088 /* print some info to stdout */ 2089 if (pcbddc->dbg_flag) { 2090 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2091 } 2092 PetscFunctionReturn(0); 2093 } 2094 2095 #undef __FUNCT__ 2096 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2097 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2098 { 2099 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2100 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2101 PetscErrorCode ierr; 2102 2103 PetscFunctionBegin; 2104 n = 0; 2105 vertices = 0; 2106 if (pcbddc->ConstraintMatrix) { 2107 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2108 for (i=0;i<local_primal_size;i++) { 2109 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2110 if (size_of_constraint == 1) n++; 2111 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2112 } 2113 if (vertices_idx) { 2114 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2115 n = 0; 2116 for (i=0;i<local_primal_size;i++) { 2117 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2118 if (size_of_constraint == 1) { 2119 vertices[n++]=row_cmat_indices[0]; 2120 } 2121 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2122 } 2123 } 2124 } 2125 *n_vertices = n; 2126 if (vertices_idx) *vertices_idx = vertices; 2127 PetscFunctionReturn(0); 2128 } 2129 2130 #undef __FUNCT__ 2131 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2132 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2133 { 2134 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2135 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2136 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2137 PetscBT touched; 2138 PetscErrorCode ierr; 2139 2140 /* This function assumes that the number of local constraints per connected component 2141 is not greater than the number of nodes defined for the connected component 2142 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2143 PetscFunctionBegin; 2144 n = 0; 2145 constraints_index = 0; 2146 if (pcbddc->ConstraintMatrix) { 2147 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2148 max_size_of_constraint = 0; 2149 for (i=0;i<local_primal_size;i++) { 2150 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2151 if (size_of_constraint > 1) { 2152 n++; 2153 } 2154 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2155 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2156 } 2157 if (constraints_idx) { 2158 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2159 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2160 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2161 n = 0; 2162 for (i=0;i<local_primal_size;i++) { 2163 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2164 if (size_of_constraint > 1) { 2165 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2166 /* find first untouched local node */ 2167 j = 0; 2168 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2169 min_index = row_cmat_global_indices[j]; 2170 min_loc = j; 2171 /* search the minimum among nodes not yet touched on the connected component 2172 since there can be more than one constraint on a single cc */ 2173 for (j=1;j<size_of_constraint;j++) { 2174 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2175 min_index = row_cmat_global_indices[j]; 2176 min_loc = j; 2177 } 2178 } 2179 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2180 constraints_index[n++] = row_cmat_indices[min_loc]; 2181 } 2182 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2183 } 2184 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2185 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2186 } 2187 } 2188 *n_constraints = n; 2189 if (constraints_idx) *constraints_idx = constraints_index; 2190 PetscFunctionReturn(0); 2191 } 2192 2193 /* the next two functions has been adapted from pcis.c */ 2194 #undef __FUNCT__ 2195 #define __FUNCT__ "PCBDDCApplySchur" 2196 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2197 { 2198 PetscErrorCode ierr; 2199 PC_IS *pcis = (PC_IS*)(pc->data); 2200 2201 PetscFunctionBegin; 2202 if (!vec2_B) { vec2_B = v; } 2203 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2204 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2205 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2206 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2207 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2208 PetscFunctionReturn(0); 2209 } 2210 2211 #undef __FUNCT__ 2212 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2213 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2214 { 2215 PetscErrorCode ierr; 2216 PC_IS *pcis = (PC_IS*)(pc->data); 2217 2218 PetscFunctionBegin; 2219 if (!vec2_B) { vec2_B = v; } 2220 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2221 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2222 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2223 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2224 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2225 PetscFunctionReturn(0); 2226 } 2227 2228 #undef __FUNCT__ 2229 #define __FUNCT__ "PCBDDCSubsetNumbering" 2230 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2231 { 2232 Vec local_vec,global_vec; 2233 IS seqis,paris; 2234 VecScatter scatter_ctx; 2235 PetscScalar *array; 2236 PetscInt *temp_global_dofs; 2237 PetscScalar globalsum; 2238 PetscInt i,j,s; 2239 PetscInt nlocals,first_index,old_index,max_local; 2240 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2241 PetscMPIInt *dof_sizes,*dof_displs; 2242 PetscBool first_found; 2243 PetscErrorCode ierr; 2244 2245 PetscFunctionBegin; 2246 /* mpi buffers */ 2247 MPI_Comm_size(comm,&size_prec_comm); 2248 MPI_Comm_rank(comm,&rank_prec_comm); 2249 j = ( !rank_prec_comm ? size_prec_comm : 0); 2250 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2251 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2252 /* get maximum size of subset */ 2253 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2254 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2255 max_local = 0; 2256 if (n_local_dofs) { 2257 max_local = temp_global_dofs[0]; 2258 for (i=1;i<n_local_dofs;i++) { 2259 if (max_local < temp_global_dofs[i] ) { 2260 max_local = temp_global_dofs[i]; 2261 } 2262 } 2263 } 2264 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2265 max_global++; 2266 max_local = 0; 2267 if (n_local_dofs) { 2268 max_local = local_dofs[0]; 2269 for (i=1;i<n_local_dofs;i++) { 2270 if (max_local < local_dofs[i] ) { 2271 max_local = local_dofs[i]; 2272 } 2273 } 2274 } 2275 max_local++; 2276 /* allocate workspace */ 2277 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2278 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2279 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2280 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2281 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2282 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2283 /* create scatter */ 2284 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2285 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2286 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2287 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2288 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2289 /* init array */ 2290 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2291 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2292 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2293 if (local_dofs_mult) { 2294 for (i=0;i<n_local_dofs;i++) { 2295 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2296 } 2297 } else { 2298 for (i=0;i<n_local_dofs;i++) { 2299 array[local_dofs[i]]=1.0; 2300 } 2301 } 2302 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2303 /* scatter into global vec and get total number of global dofs */ 2304 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2305 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2306 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2307 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2308 /* Fill global_vec with cumulative function for global numbering */ 2309 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2310 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2311 nlocals = 0; 2312 first_index = -1; 2313 first_found = PETSC_FALSE; 2314 for (i=0;i<s;i++) { 2315 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2316 first_found = PETSC_TRUE; 2317 first_index = i; 2318 } 2319 nlocals += (PetscInt)PetscRealPart(array[i]); 2320 } 2321 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2322 if (!rank_prec_comm) { 2323 dof_displs[0]=0; 2324 for (i=1;i<size_prec_comm;i++) { 2325 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2326 } 2327 } 2328 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2329 if (first_found) { 2330 array[first_index] += (PetscScalar)nlocals; 2331 old_index = first_index; 2332 for (i=first_index+1;i<s;i++) { 2333 if (PetscRealPart(array[i]) > 0.0) { 2334 array[i] += array[old_index]; 2335 old_index = i; 2336 } 2337 } 2338 } 2339 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2340 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2341 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2342 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2343 /* get global ordering of local dofs */ 2344 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2345 if (local_dofs_mult) { 2346 for (i=0;i<n_local_dofs;i++) { 2347 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2348 } 2349 } else { 2350 for (i=0;i<n_local_dofs;i++) { 2351 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2352 } 2353 } 2354 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2355 /* free workspace */ 2356 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2357 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2358 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2359 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2360 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2361 /* return pointer to global ordering of local dofs */ 2362 *global_numbering_subset = temp_global_dofs; 2363 PetscFunctionReturn(0); 2364 } 2365 2366 #undef __FUNCT__ 2367 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2368 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2369 { 2370 PetscInt i,j; 2371 PetscScalar *alphas; 2372 PetscErrorCode ierr; 2373 2374 PetscFunctionBegin; 2375 /* this implements stabilized Gram-Schmidt */ 2376 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2377 for (i=0;i<n;i++) { 2378 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2379 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2380 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2381 } 2382 ierr = PetscFree(alphas);CHKERRQ(ierr); 2383 PetscFunctionReturn(0); 2384 } 2385 2386 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2387 #undef __FUNCT__ 2388 #define __FUNCT__ "MatConvert_IS_AIJ" 2389 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2390 { 2391 Mat new_mat; 2392 Mat_IS *matis = (Mat_IS*)(mat->data); 2393 /* info on mat */ 2394 PetscInt bs,rows,cols; 2395 PetscInt lrows,lcols; 2396 PetscInt local_rows,local_cols; 2397 PetscMPIInt nsubdomains; 2398 /* preallocation */ 2399 Vec vec_dnz,vec_onz; 2400 PetscScalar *my_dnz,*my_onz,*array; 2401 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2402 PetscInt index_row,index_col,owner; 2403 PetscInt *local_indices,*global_indices; 2404 /* work */ 2405 PetscInt i,j; 2406 PetscErrorCode ierr; 2407 2408 PetscFunctionBegin; 2409 /* CHECKS */ 2410 /* get info from mat */ 2411 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2412 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2413 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2414 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2415 2416 /* MAT_INITIAL_MATRIX */ 2417 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2418 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2419 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2420 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2421 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2422 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2423 2424 /* 2425 preallocation 2426 */ 2427 2428 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2429 /* 2430 Some vectors are needed to sum up properly on shared interface dofs. 2431 Note that preallocation is not exact, since it overestimates nonzeros 2432 */ 2433 /* 2434 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2435 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2436 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2437 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2438 */ 2439 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2440 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2441 /* All processes need to compute entire row ownership */ 2442 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2443 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2444 for (i=0;i<nsubdomains;i++) { 2445 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2446 row_ownership[j]=i; 2447 } 2448 } 2449 /* map indices of local mat to global values */ 2450 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2451 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2452 for (i=0;i<local_rows;i++) local_indices[i]=i; 2453 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2454 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2455 2456 /* 2457 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2458 then, they will be summed up properly. This way, preallocation is always sufficient 2459 */ 2460 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2461 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2462 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2463 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2464 for (i=0;i<local_rows;i++) { 2465 index_row = global_indices[i]; 2466 for (j=i;j<local_rows;j++) { 2467 owner = row_ownership[index_row]; 2468 index_col = global_indices[j]; 2469 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2470 my_dnz[i] += 1.0; 2471 } else { /* offdiag block */ 2472 my_onz[i] += 1.0; 2473 } 2474 /* same as before, interchanging rows and cols */ 2475 if (i != j) { 2476 owner = row_ownership[index_col]; 2477 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2478 my_dnz[j] += 1.0; 2479 } else { 2480 my_onz[j] += 1.0; 2481 } 2482 } 2483 } 2484 } 2485 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2486 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2487 if (local_rows) { /* multilevel guard */ 2488 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2489 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2490 } 2491 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2492 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2493 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2494 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2495 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2496 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2497 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2498 2499 /* set computed preallocation in dnz and onz */ 2500 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2501 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2502 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2503 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2504 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2505 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2506 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2507 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2508 2509 /* Resize preallocation if overestimated */ 2510 for (i=0;i<lrows;i++) { 2511 dnz[i] = PetscMin(dnz[i],lcols); 2512 onz[i] = PetscMin(onz[i],cols-lcols); 2513 } 2514 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2515 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2516 2517 /* 2518 Set values. Very Basic. 2519 */ 2520 for (i=0;i<local_rows;i++) { 2521 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2522 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2523 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2524 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2525 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2526 } 2527 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2528 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2529 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2530 2531 /* get back mat */ 2532 *M = new_mat; 2533 PetscFunctionReturn(0); 2534 } 2535 2536 #undef __FUNCT__ 2537 #define __FUNCT__ "MatISSubassemble_Private" 2538 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2539 { 2540 Mat subdomain_adj; 2541 IS new_ranks,ranks_send_to; 2542 MatPartitioning partitioner; 2543 Mat_IS *matis; 2544 PetscInt n_neighs,*neighs,*n_shared,**shared; 2545 PetscInt prank; 2546 PetscMPIInt size,rank,color; 2547 PetscInt *xadj,*adjncy,*oldranks; 2548 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2549 PetscInt i,j,n_subdomains,local_size,threshold=0; 2550 PetscErrorCode ierr; 2551 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2552 PetscSubcomm subcomm; 2553 2554 PetscFunctionBegin; 2555 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2556 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2557 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2558 2559 /* Get info on mapping */ 2560 matis = (Mat_IS*)(mat->data); 2561 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2562 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2563 2564 /* build local CSR graph of subdomains' connectivity */ 2565 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2566 xadj[0] = 0; 2567 xadj[1] = PetscMax(n_neighs-1,0); 2568 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2569 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2570 2571 if (threshold) { 2572 PetscInt* count,min_threshold; 2573 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2574 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2575 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2576 for (j=0;j<n_shared[i];j++) { 2577 count[shared[i][j]] += 1; 2578 } 2579 } 2580 /* adapt threshold since we dont want to lose significant connections */ 2581 min_threshold = n_neighs; 2582 for (i=1;i<n_neighs;i++) { 2583 for (j=0;j<n_shared[i];j++) { 2584 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2585 } 2586 } 2587 PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold); 2588 threshold = PetscMax(min_threshold+1,threshold); 2589 PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold); 2590 xadj[1] = 0; 2591 for (i=1;i<n_neighs;i++) { 2592 for (j=0;j<n_shared[i];j++) { 2593 if (count[shared[i][j]] < threshold) { 2594 adjncy[xadj[1]] = neighs[i]; 2595 adjncy_wgt[xadj[1]] = n_shared[i]; 2596 xadj[1]++; 2597 break; 2598 } 2599 } 2600 } 2601 ierr = PetscFree(count);CHKERRQ(ierr); 2602 } else { 2603 if (xadj[1]) { 2604 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2605 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2606 } 2607 } 2608 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2609 if (use_square) { 2610 for (i=0;i<xadj[1];i++) { 2611 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2612 } 2613 } 2614 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2615 2616 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2617 2618 /* 2619 Restrict work on active processes only. 2620 */ 2621 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2622 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2623 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2624 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2625 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2626 if (color) { 2627 ierr = PetscFree(xadj);CHKERRQ(ierr); 2628 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2629 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2630 } else { 2631 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2632 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2633 prank = rank; 2634 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2635 for (i=0;i<size;i++) { 2636 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2637 } 2638 for (i=0;i<xadj[1];i++) { 2639 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2640 } 2641 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2642 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2643 n_subdomains = (PetscInt)size; 2644 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2645 2646 /* Partition */ 2647 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2648 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2649 if (use_vwgt) { 2650 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2651 v_wgt[0] = local_size; 2652 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2653 } 2654 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2655 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2656 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2657 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2658 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2659 2660 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2661 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2662 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2663 /* clean up */ 2664 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2665 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2666 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2667 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2668 } 2669 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2670 2671 /* assemble parallel IS for sends */ 2672 i = 1; 2673 if (color) i=0; 2674 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2675 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2676 2677 /* get back IS */ 2678 *is_sends = ranks_send_to; 2679 PetscFunctionReturn(0); 2680 } 2681 2682 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2683 2684 #undef __FUNCT__ 2685 #define __FUNCT__ "MatISSubassemble" 2686 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2687 { 2688 Mat local_mat,new_mat; 2689 Mat_IS *matis; 2690 IS is_sends_internal; 2691 PetscInt rows,cols; 2692 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2693 PetscBool ismatis,isdense; 2694 ISLocalToGlobalMapping l2gmap; 2695 PetscInt* l2gmap_indices; 2696 const PetscInt* is_indices; 2697 MatType new_local_type; 2698 MatTypePrivate new_local_type_private; 2699 /* buffers */ 2700 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2701 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2702 /* MPI */ 2703 MPI_Comm comm; 2704 PetscMPIInt n_sends,n_recvs,commsize; 2705 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2706 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2707 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2708 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2709 PetscErrorCode ierr; 2710 2711 PetscFunctionBegin; 2712 /* checks */ 2713 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2714 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2715 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2716 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2717 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2718 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2719 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2720 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2721 PetscValidLogicalCollectiveInt(mat,bs,0); 2722 /* prepare IS for sending if not provided */ 2723 if (!is_sends) { 2724 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2725 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2726 } else { 2727 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2728 is_sends_internal = is_sends; 2729 } 2730 2731 /* get pointer of MATIS data */ 2732 matis = (Mat_IS*)mat->data; 2733 2734 /* get comm */ 2735 comm = PetscObjectComm((PetscObject)mat); 2736 2737 /* compute number of sends */ 2738 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2739 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2740 2741 /* compute number of receives */ 2742 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2743 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2744 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2745 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2746 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2747 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2748 ierr = PetscFree(iflags);CHKERRQ(ierr); 2749 2750 /* prepare send/receive buffers */ 2751 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2752 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2753 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2754 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2755 2756 /* Get data from local mat */ 2757 if (!isdense) { 2758 /* TODO: See below some guidelines on how to prepare the local buffers */ 2759 /* 2760 send_buffer_vals should contain the raw values of the local matrix 2761 send_buffer_idxs should contain: 2762 - MatType_PRIVATE type 2763 - PetscInt size_of_l2gmap 2764 - PetscInt global_row_indices[size_of_l2gmap] 2765 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2766 */ 2767 } else { 2768 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2769 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2770 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2771 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2772 send_buffer_idxs[1] = i; 2773 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2774 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2775 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2776 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2777 for (i=0;i<n_sends;i++) { 2778 ilengths_vals[is_indices[i]] = len*len; 2779 ilengths_idxs[is_indices[i]] = len+2; 2780 } 2781 } 2782 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2783 buf_size_idxs = 0; 2784 buf_size_vals = 0; 2785 for (i=0;i<n_recvs;i++) { 2786 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2787 buf_size_vals += (PetscInt)olengths_vals[i]; 2788 } 2789 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2790 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2791 2792 /* get new tags for clean communications */ 2793 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2794 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2795 2796 /* allocate for requests */ 2797 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2798 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2799 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2800 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2801 2802 /* communications */ 2803 ptr_idxs = recv_buffer_idxs; 2804 ptr_vals = recv_buffer_vals; 2805 for (i=0;i<n_recvs;i++) { 2806 source_dest = onodes[i]; 2807 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2808 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2809 ptr_idxs += olengths_idxs[i]; 2810 ptr_vals += olengths_vals[i]; 2811 } 2812 for (i=0;i<n_sends;i++) { 2813 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2814 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2815 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2816 } 2817 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2818 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2819 2820 /* assemble new l2g map */ 2821 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2822 ptr_idxs = recv_buffer_idxs; 2823 buf_size_idxs = 0; 2824 for (i=0;i<n_recvs;i++) { 2825 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2826 ptr_idxs += olengths_idxs[i]; 2827 } 2828 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2829 ptr_idxs = recv_buffer_idxs; 2830 buf_size_idxs = 0; 2831 for (i=0;i<n_recvs;i++) { 2832 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2833 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2834 ptr_idxs += olengths_idxs[i]; 2835 } 2836 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2837 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2838 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2839 2840 /* infer new local matrix type from received local matrices type */ 2841 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2842 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2843 new_local_type_private = MATAIJ_PRIVATE; 2844 new_local_type = MATSEQAIJ; 2845 if (n_recvs) { 2846 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 2847 ptr_idxs = recv_buffer_idxs; 2848 for (i=0;i<n_recvs;i++) { 2849 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2850 new_local_type_private = MATAIJ_PRIVATE; 2851 break; 2852 } 2853 ptr_idxs += olengths_idxs[i]; 2854 } 2855 switch (new_local_type_private) { 2856 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2857 new_local_type = MATSEQAIJ; 2858 bs = 1; 2859 break; 2860 case MATAIJ_PRIVATE: 2861 new_local_type = MATSEQAIJ; 2862 bs = 1; 2863 break; 2864 case MATBAIJ_PRIVATE: 2865 new_local_type = MATSEQBAIJ; 2866 break; 2867 case MATSBAIJ_PRIVATE: 2868 new_local_type = MATSEQSBAIJ; 2869 break; 2870 default: 2871 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2872 break; 2873 } 2874 } 2875 2876 /* create MATIS object */ 2877 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2878 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2879 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2880 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2881 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2882 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2883 2884 /* set values */ 2885 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2886 ptr_vals = recv_buffer_vals; 2887 ptr_idxs = recv_buffer_idxs; 2888 for (i=0;i<n_recvs;i++) { 2889 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2890 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2891 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2892 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2893 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2894 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2895 } 2896 ptr_idxs += olengths_idxs[i]; 2897 ptr_vals += olengths_vals[i]; 2898 } 2899 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2900 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2901 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2902 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2903 2904 { /* check */ 2905 Vec lvec,rvec; 2906 PetscReal infty_error; 2907 2908 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2909 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2910 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2911 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2912 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2913 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2914 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2915 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2916 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2917 } 2918 2919 /* free workspace */ 2920 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2921 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2922 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2923 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2924 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2925 if (isdense) { 2926 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2927 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2928 } else { 2929 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2930 } 2931 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2932 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2933 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2934 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2935 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2936 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2937 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2938 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2939 ierr = PetscFree(onodes);CHKERRQ(ierr); 2940 /* get back new mat */ 2941 *mat_n = new_mat; 2942 PetscFunctionReturn(0); 2943 } 2944 2945 #undef __FUNCT__ 2946 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2947 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2948 { 2949 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2950 PC_IS *pcis = (PC_IS*)pc->data; 2951 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2952 MatNullSpace CoarseNullSpace=NULL; 2953 ISLocalToGlobalMapping coarse_islg; 2954 IS coarse_is; 2955 PetscInt max_it,coarse_size,*local_primal_indices=NULL; 2956 PetscInt im_active=-1,active_procs=-1; 2957 PC pc_temp; 2958 PCType coarse_pc_type; 2959 KSPType coarse_ksp_type; 2960 PetscBool multilevel_requested,multilevel_allowed; 2961 PetscBool setsym,issym,isbddc,isnn; 2962 MatStructure matstruct; 2963 PetscErrorCode ierr; 2964 2965 PetscFunctionBegin; 2966 /* Assign global numbering to coarse dofs */ 2967 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2968 2969 /* infer some info from user */ 2970 issym = PETSC_FALSE; 2971 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2972 multilevel_allowed = PETSC_FALSE; 2973 multilevel_requested = PETSC_FALSE; 2974 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 2975 if (multilevel_requested) { 2976 /* count "active processes" */ 2977 im_active = !!(pcis->n); 2978 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2979 if (active_procs/pcbddc->coarsening_ratio < 2) { 2980 multilevel_allowed = PETSC_FALSE; 2981 } else { 2982 multilevel_allowed = PETSC_TRUE; 2983 } 2984 } 2985 2986 /* set defaults for coarse KSP and PC */ 2987 if (multilevel_allowed) { 2988 if (issym) { 2989 coarse_ksp_type = KSPRICHARDSON; 2990 } else { 2991 coarse_ksp_type = KSPCHEBYSHEV; 2992 } 2993 coarse_pc_type = PCBDDC; 2994 } else { 2995 coarse_ksp_type = KSPPREONLY; 2996 coarse_pc_type = PCREDUNDANT; 2997 } 2998 2999 /* create the coarse KSP object only once with defaults */ 3000 if (!pcbddc->coarse_ksp) { 3001 char prefix[256],str_level[3]; 3002 size_t len; 3003 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3004 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3005 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3006 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3007 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3008 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3009 /* prefix */ 3010 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3011 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3012 if (!pcbddc->current_level) { 3013 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3014 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3015 } else { 3016 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3017 if (pcbddc->current_level>1) len -= 2; 3018 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3019 *(prefix+len)='\0'; 3020 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3021 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3022 } 3023 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3024 } 3025 /* allow user customization */ 3026 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3027 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3028 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3029 3030 /* get some info after set from options */ 3031 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3032 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3033 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3034 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3035 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3036 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3037 isbddc = PETSC_FALSE; 3038 } 3039 3040 /* propagate BDDC info to the next level */ 3041 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3042 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3043 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3044 3045 /* creates temporary MATIS object for coarse matrix */ 3046 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3047 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3048 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3049 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3050 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3051 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3052 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3053 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3054 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3055 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 3056 3057 /* assemble coarse matrix */ 3058 if (isbddc || isnn) { 3059 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3060 } else { 3061 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3062 } 3063 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3064 3065 /* create local to global scatters for coarse problem */ 3066 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3067 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3068 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3069 3070 /* propagate symmetry info to coarse matrix */ 3071 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3072 3073 /* Compute coarse null space (special handling by BDDC only) */ 3074 if (pcbddc->NullSpace) { 3075 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3076 if (isbddc) { 3077 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3078 } else { 3079 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3080 } 3081 } 3082 3083 /* set operators */ 3084 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3085 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3086 3087 /* additional KSP customization */ 3088 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3089 if (max_it < 5) { 3090 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3091 } 3092 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3093 3094 3095 /* print some info if requested */ 3096 if (pcbddc->dbg_flag) { 3097 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3098 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3099 if (!multilevel_allowed) { 3100 if (multilevel_requested) { 3101 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3102 } else if (pcbddc->max_levels) { 3103 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3104 } 3105 } 3106 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3107 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3108 } 3109 3110 /* setup coarse ksp */ 3111 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3112 if (pcbddc->dbg_flag) { 3113 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3114 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3115 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3116 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3117 } 3118 3119 /* Check coarse problem if requested */ 3120 if (pcbddc->dbg_flag) { 3121 KSP check_ksp; 3122 KSPType check_ksp_type; 3123 PC check_pc; 3124 Vec check_vec; 3125 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3126 PetscInt its; 3127 PetscBool ispreonly,compute; 3128 3129 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3130 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3131 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3132 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3133 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3134 if (ispreonly) { 3135 check_ksp_type = KSPPREONLY; 3136 compute = PETSC_FALSE; 3137 } else { 3138 if (issym) check_ksp_type = KSPCG; 3139 else check_ksp_type = KSPGMRES; 3140 compute = PETSC_TRUE; 3141 } 3142 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3143 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3144 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3145 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3146 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3147 /* create random vec */ 3148 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3149 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3150 if (CoarseNullSpace) { 3151 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3152 } 3153 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3154 /* solve coarse problem */ 3155 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3156 if (CoarseNullSpace) { 3157 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3158 } 3159 /* check coarse problem residual error */ 3160 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3161 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3162 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3163 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3164 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3165 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3166 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3167 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3168 /* get eigenvalue estimation if preonly has not been requested */ 3169 if (!ispreonly) { 3170 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3171 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3172 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3173 } 3174 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3175 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3176 } 3177 /* free memory */ 3178 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3179 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3180 PetscFunctionReturn(0); 3181 } 3182 3183 #undef __FUNCT__ 3184 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3185 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3186 { 3187 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3188 PC_IS* pcis = (PC_IS*)pc->data; 3189 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3190 PetscInt i,j,coarse_size; 3191 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3192 PetscErrorCode ierr; 3193 3194 PetscFunctionBegin; 3195 /* get indices in local ordering for vertices and constraints */ 3196 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3197 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3198 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3199 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3200 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3201 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3202 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3203 3204 /* Compute global number of coarse dofs */ 3205 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3206 3207 /* check numbering */ 3208 if (pcbddc->dbg_flag) { 3209 PetscScalar coarsesum,*array; 3210 3211 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3212 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3213 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3214 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3215 for (i=0;i<pcbddc->local_primal_size;i++) { 3216 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3217 } 3218 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3219 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3220 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3221 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3222 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3223 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3224 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3225 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3226 for (i=0;i<pcis->n;i++) { 3227 if (array[i] == 1.0) { 3228 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3229 } 3230 } 3231 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3232 for (i=0;i<pcis->n;i++) { 3233 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3234 } 3235 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3236 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3237 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3238 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3239 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3240 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3241 if (pcbddc->dbg_flag > 1) { 3242 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3243 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3244 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3245 for (i=0;i<pcbddc->local_primal_size;i++) { 3246 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]); 3247 } 3248 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3249 } 3250 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3251 } 3252 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3253 /* get back data */ 3254 *coarse_size_n = coarse_size; 3255 *local_primal_indices_n = local_primal_indices; 3256 PetscFunctionReturn(0); 3257 } 3258 3259