1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCResetCustomization" 47 PetscErrorCode PCBDDCResetCustomization(PC pc) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 PetscInt i; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 56 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 59 for (i=0;i<pcbddc->n_ISForDofs;i++) { 60 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 61 } 62 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 63 pcbddc->n_ISForDofs = 0; 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "PCBDDCResetTopography" 69 PetscErrorCode PCBDDCResetTopography(PC pc) 70 { 71 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 72 PetscErrorCode ierr; 73 74 PetscFunctionBegin; 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 102 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "PCBDDCCreateWorkVectors" 111 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 112 { 113 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 114 PC_IS *pcis = (PC_IS*)pc->data; 115 VecType impVecType; 116 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 117 PetscErrorCode ierr; 118 119 PetscFunctionBegin; 120 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 121 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 122 local_primal_size = n_constraints+n_vertices; 123 n_R = pcis->n-n_vertices; 124 /* local work vectors */ 125 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 126 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 127 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 128 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 129 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 130 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 131 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 132 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 133 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 134 if (n_constraints) { 135 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 136 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 137 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "PCBDDCSetUpCorrection" 144 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 145 { 146 PetscErrorCode ierr; 147 /* pointers to pcis and pcbddc */ 148 PC_IS* pcis = (PC_IS*)pc->data; 149 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 150 /* submatrices of local problem */ 151 Mat A_RV,A_VR,A_VV; 152 /* working matrices */ 153 Mat M1,M2,M3,C_CR; 154 /* working vectors */ 155 Vec vec1_C,vec2_C,vec1_V,vec2_V; 156 /* additional working stuff */ 157 IS is_aux; 158 ISLocalToGlobalMapping BtoNmap; 159 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 160 const PetscScalar *array,*row_cmat_values; 161 const PetscInt *row_cmat_indices,*idx_R_local; 162 PetscInt *vertices,*idx_V_B,*auxindices; 163 PetscInt n_vertices,n_constraints,size_of_constraint; 164 PetscInt i,j,n_R,n_D,n_B; 165 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check; 166 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 167 MatType impMatType; 168 /* some shortcuts to scalars */ 169 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 170 /* for debugging purposes */ 171 PetscReal *coarsefunctions_errors,*constraints_errors; 172 173 PetscFunctionBegin; 174 /* get number of vertices and their local indices */ 175 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 176 n_constraints = pcbddc->local_primal_size-n_vertices; 177 /* Set Non-overlapping dimensions */ 178 n_B = pcis->n_B; n_D = pcis->n - n_B; 179 n_R = pcis->n-n_vertices; 180 181 /* Set types for local objects needed by BDDC precondtioner */ 182 impMatType = MATSEQDENSE; 183 184 /* Allocating some extra storage just to be safe */ 185 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 186 for (i=0;i<pcis->n;i++) auxindices[i]=i; 187 188 /* vertices in boundary numbering */ 189 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 190 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 191 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 192 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 193 if (i != n_vertices) { 194 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 195 } 196 197 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 198 if (n_constraints) { 199 /* work vectors */ 200 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 201 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 202 /* auxiliary matrices */ 203 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 204 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 205 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 206 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 207 208 /* Extract constraints on R nodes: C_{CR} */ 209 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 210 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 211 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 212 213 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 214 for (i=0;i<n_constraints;i++) { 215 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 216 /* Get row of constraint matrix in R numbering */ 217 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 218 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 219 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 220 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 221 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 222 /* Solve for row of constraint matrix in R numbering */ 223 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 224 /* Set values in local_auxmat2 */ 225 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 226 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 227 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 228 } 229 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 230 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 231 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 232 233 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 234 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 235 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 236 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 237 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 238 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 239 ierr = MatSetUp(M1);CHKERRQ(ierr); 240 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 241 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 242 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 243 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 244 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 245 ierr = MatDestroy(&M2);CHKERRQ(ierr); 246 ierr = MatDestroy(&M3);CHKERRQ(ierr); 247 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 248 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 249 } 250 251 /* Get submatrices from subdomain matrix */ 252 if (n_vertices) { 253 PetscInt ibs,mbs; 254 PetscBool issbaij; 255 Mat newmat; 256 257 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 258 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 259 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 260 if (ibs != mbs) { /* need to convert to SEQAIJ */ 261 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 262 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 263 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 264 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 265 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 266 } else { 267 /* this is safe */ 268 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 269 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 270 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 271 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 272 /* which of the two approaches is faster? */ 273 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 274 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 275 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 276 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 277 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 278 } else { 279 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 280 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 281 } 282 } 283 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 284 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 285 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 286 } 287 288 /* Matrix of coarse basis functions (local) */ 289 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 290 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 291 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 292 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 293 if (pcbddc->switch_static || pcbddc->dbg_flag) { 294 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 295 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 296 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 297 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 298 } 299 300 if (pcbddc->dbg_flag) { 301 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 302 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 303 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 304 } 305 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 306 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 307 308 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 309 310 /* vertices */ 311 for (i=0;i<n_vertices;i++) { 312 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 313 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 314 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 315 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 316 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 317 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 318 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 319 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 320 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 321 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 322 if (n_constraints) { 323 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 324 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 325 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 326 } 327 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 328 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 329 330 /* Set values in coarse basis function and subdomain part of coarse_mat */ 331 /* coarse basis functions */ 332 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 333 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 334 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 335 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 336 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 337 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 338 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 339 if (pcbddc->switch_static || pcbddc->dbg_flag) { 340 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 341 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 342 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 343 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 344 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 345 } 346 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 347 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 348 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 349 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 350 if (n_constraints) { 351 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 352 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 353 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 354 } 355 356 /* check */ 357 if (pcbddc->dbg_flag) { 358 /* assemble subdomain vector on local nodes */ 359 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 360 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 361 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 362 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 363 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 364 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 365 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 366 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 367 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 368 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 369 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 370 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 371 if (n_constraints) { 372 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 373 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 374 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 375 } 376 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 377 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 378 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 379 /* check saddle point solution */ 380 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 381 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 382 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 383 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 384 /* shift by the identity matrix */ 385 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 386 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 387 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 388 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 389 } 390 } 391 392 /* constraints */ 393 for (i=0;i<n_constraints;i++) { 394 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 395 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 396 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 397 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 398 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 399 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 400 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 401 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 402 if (n_vertices) { 403 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 404 } 405 /* Set values in coarse basis function and subdomain part of coarse_mat */ 406 /* coarse basis functions */ 407 j = i+n_vertices; /* don't touch this! */ 408 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 409 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 410 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 411 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 412 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 413 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 414 if (pcbddc->switch_static || pcbddc->dbg_flag) { 415 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 416 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 417 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 418 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 419 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 420 } 421 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 422 if (n_vertices) { 423 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 424 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 425 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 426 } 427 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 428 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 429 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 430 431 if (pcbddc->dbg_flag) { 432 /* assemble subdomain vector on nodes */ 433 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 434 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 435 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 436 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 437 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 438 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 439 /* assemble subdomain vector of lagrange multipliers */ 440 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 441 if (n_vertices) { 442 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 443 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 444 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 445 } 446 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 447 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 448 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 449 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 450 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 451 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 452 /* check saddle point solution */ 453 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 454 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 456 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 457 /* shift by the identity matrix */ 458 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 459 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 460 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 461 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 462 } 463 } 464 /* call assembling routines for local coarse basis */ 465 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 467 if (pcbddc->switch_static || pcbddc->dbg_flag) { 468 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 469 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 470 } 471 472 /* compute other basis functions for non-symmetric problems */ 473 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 474 if (!setsym || (setsym && !issym)) { 475 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 476 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 477 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 478 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 479 if (pcbddc->switch_static || pcbddc->dbg_flag) { 480 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 481 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 482 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 483 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 484 } 485 for (i=0;i<pcbddc->local_primal_size;i++) { 486 if (n_constraints) { 487 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 488 for (j=0;j<n_constraints;j++) { 489 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 490 } 491 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 492 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 493 } 494 if (i<n_vertices) { 495 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 496 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 497 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 498 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 499 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 500 if (n_constraints) { 501 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 502 } 503 } else { 504 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 505 } 506 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 507 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 508 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 509 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 510 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 511 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 512 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 513 if (i<n_vertices) { 514 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 515 } 516 if (pcbddc->switch_static || pcbddc->dbg_flag) { 517 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 518 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 519 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 520 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 521 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 522 } 523 524 if (pcbddc->dbg_flag) { 525 /* assemble subdomain vector on nodes */ 526 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 527 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 528 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 529 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 530 if (i<n_vertices) { 531 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 532 } 533 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 534 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 535 /* assemble subdomain vector of lagrange multipliers */ 536 for (j=0;j<pcbddc->local_primal_size;j++) { 537 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 538 } 539 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 540 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 541 /* check saddle point solution */ 542 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 543 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 544 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 545 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 546 /* shift by the identity matrix */ 547 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 548 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 549 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 550 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 551 } 552 } 553 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 555 if (pcbddc->switch_static || pcbddc->dbg_flag) { 556 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 557 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 558 } 559 unsymmetric_check = PETSC_TRUE; 560 } else { /* take references to already computed coarse basis */ 561 unsymmetric_check = PETSC_FALSE; 562 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 563 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 564 if (pcbddc->coarse_phi_D) { 565 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 566 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 567 } 568 } 569 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 570 /* Checking coarse_sub_mat and coarse basis functios */ 571 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 572 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 573 if (pcbddc->dbg_flag) { 574 Mat coarse_sub_mat; 575 Mat AUXMAT,TM1,TM2,TM3,TM4; 576 Mat coarse_phi_D,coarse_phi_B; 577 Mat coarse_psi_D,coarse_psi_B; 578 Mat A_II,A_BB,A_IB,A_BI; 579 MatType checkmattype=MATSEQAIJ; 580 PetscReal real_value; 581 582 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 583 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 584 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 585 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 586 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 587 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 588 if (unsymmetric_check) { 589 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 590 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 591 } 592 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 593 594 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 595 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 596 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 597 if (unsymmetric_check) { 598 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 599 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 600 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 601 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 602 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 603 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 604 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 605 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 606 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 607 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 608 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 609 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 610 } else { 611 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 612 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 613 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 614 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 615 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 616 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 617 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 618 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 619 } 620 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 621 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 622 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 623 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 624 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 625 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 628 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 629 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 630 for (i=0;i<pcbddc->local_primal_size;i++) { 631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 632 } 633 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 634 for (i=0;i<pcbddc->local_primal_size;i++) { 635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 636 } 637 if (unsymmetric_check) { 638 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 639 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 640 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 641 } 642 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 643 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 644 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 645 } 646 } 647 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 648 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 649 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 650 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 651 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 652 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 653 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 654 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 655 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 656 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 657 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 658 if (unsymmetric_check) { 659 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 660 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 661 } 662 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 663 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 664 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 665 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 666 } 667 /* free memory */ 668 if (n_vertices) { 669 ierr = PetscFree(vertices);CHKERRQ(ierr); 670 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 671 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 672 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 673 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 674 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 675 } 676 if (n_constraints) { 677 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 678 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 679 ierr = MatDestroy(&M1);CHKERRQ(ierr); 680 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 681 } 682 ierr = PetscFree(auxindices);CHKERRQ(ierr); 683 /* get back data */ 684 *coarse_submat_vals_n = coarse_submat_vals; 685 PetscFunctionReturn(0); 686 } 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 690 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 691 { 692 PC_IS* pcis = (PC_IS*)(pc->data); 693 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 694 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 695 PetscBool issbaij,isbaij; 696 /* manage repeated solves */ 697 MatReuse reuse; 698 MatStructure matstruct; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 /* get mat flags */ 703 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 704 reuse = MAT_INITIAL_MATRIX; 705 if (pc->setupcalled) { 706 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 707 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 708 if (matstruct == SAME_NONZERO_PATTERN) { 709 reuse = MAT_REUSE_MATRIX; 710 } else { 711 reuse = MAT_INITIAL_MATRIX; 712 } 713 } 714 if (reuse == MAT_INITIAL_MATRIX) { 715 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 717 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 718 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 719 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 720 } 721 722 /* transform local matrices if needed */ 723 if (pcbddc->use_change_of_basis) { 724 Mat change_mat_all; 725 PetscScalar *row_cmat_values; 726 PetscInt *row_cmat_indices; 727 PetscInt *nnz,*is_indices,*temp_indices; 728 PetscInt i,j,k,n_D,n_B; 729 730 /* Get Non-overlapping dimensions */ 731 n_B = pcis->n_B; 732 n_D = pcis->n-n_B; 733 734 /* compute nonzero structure of change of basis on all local nodes */ 735 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 736 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 737 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 738 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 739 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 740 k=1; 741 for (i=0;i<n_B;i++) { 742 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 743 nnz[is_indices[i]]=j; 744 if (k < j) k = j; 745 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 746 } 747 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 748 /* assemble change of basis matrix on the whole set of local dofs */ 749 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 750 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 751 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 752 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 753 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 754 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 755 for (i=0;i<n_D;i++) { 756 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 757 } 758 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 759 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 760 for (i=0;i<n_B;i++) { 761 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 762 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 763 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 764 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 765 } 766 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 768 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 769 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 770 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 771 if (!issbaij && !isbaij) { 772 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 773 } else { 774 Mat work_mat; 775 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 776 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 777 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 778 } 779 /* 780 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 781 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 782 */ 783 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 784 ierr = PetscFree(nnz);CHKERRQ(ierr); 785 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 786 } else { 787 /* without change of basis, the local matrix is unchanged */ 788 if (!pcbddc->local_mat) { 789 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 790 pcbddc->local_mat = matis->A; 791 } 792 } 793 794 /* get submatrices */ 795 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 796 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 797 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 798 if (!issbaij) { 799 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 800 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 801 } else { 802 Mat newmat; 803 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 804 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 805 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 806 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 807 } 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 813 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 814 { 815 PC_IS* pcis = (PC_IS*)(pc->data); 816 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 817 IS is_aux1,is_aux2; 818 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 819 PetscInt n_vertices,i,j,n_R,n_D,n_B; 820 PetscInt vbs,bs; 821 PetscBT bitmask; 822 PetscErrorCode ierr; 823 824 PetscFunctionBegin; 825 /* Set Non-overlapping dimensions */ 826 n_B = pcis->n_B; n_D = pcis->n - n_B; 827 /* get vertex indices from constraint matrix */ 828 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 829 830 /* create auxiliary bitmask */ 831 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 832 for (i=0;i<n_vertices;i++) { 833 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 834 } 835 836 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 837 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 838 for (i=0, n_R=0; i<pcis->n; i++) { 839 if (!PetscBTLookup(bitmask,i)) { 840 idx_R_local[n_R] = i; 841 n_R++; 842 } 843 } 844 845 /* Block code */ 846 vbs = 1; 847 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 848 if (bs>1 && !(n_vertices%bs)) { 849 PetscBool is_blocked = PETSC_TRUE; 850 PetscInt *vary; 851 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 852 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 853 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 854 for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++; 855 for (i=0; i<n_vertices; i++) { 856 if (vary[i]!=0 && vary[i]!=bs) { 857 is_blocked = PETSC_FALSE; 858 break; 859 } 860 } 861 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 862 vbs = bs; 863 for (i=0;i<n_R/vbs;i++) { 864 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 865 } 866 } 867 ierr = PetscFree(vary);CHKERRQ(ierr); 868 } 869 ierr = PetscFree(vertices);CHKERRQ(ierr); 870 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 871 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 872 873 /* print some info if requested */ 874 if (pcbddc->dbg_flag) { 875 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 876 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 877 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 878 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 879 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 880 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 881 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 882 } 883 884 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 885 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 886 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 887 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 888 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 889 for (i=0; i<n_D; i++) { 890 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 891 } 892 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 893 for (i=0, j=0; i<n_R; i++) { 894 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 895 aux_array1[j++] = i; 896 } 897 } 898 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 899 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 900 for (i=0, j=0; i<n_B; i++) { 901 if (!PetscBTLookup(bitmask,is_indices[i])) { 902 aux_array2[j++] = i; 903 } 904 } 905 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 906 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 907 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 908 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 909 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 910 911 if (pcbddc->switch_static || pcbddc->dbg_flag) { 912 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 913 for (i=0, j=0; i<n_R; i++) { 914 if (PetscBTLookup(bitmask,idx_R_local[i])) { 915 aux_array1[j++] = i; 916 } 917 } 918 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 919 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 920 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 921 } 922 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 923 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 924 PetscFunctionReturn(0); 925 } 926 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 930 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 931 { 932 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 933 PC_IS *pcis = (PC_IS*)pc->data; 934 PC pc_temp; 935 Mat A_RR; 936 Vec vec1,vec2,vec3; 937 MatStructure matstruct; 938 PetscScalar m_one = -1.0; 939 PetscReal value; 940 PetscInt n_D,n_R,ibs,mbs; 941 PetscBool use_exact,use_exact_reduced,issbaij; 942 PetscErrorCode ierr; 943 /* prefixes stuff */ 944 char dir_prefix[256],neu_prefix[256],str_level[3]; 945 size_t len; 946 947 PetscFunctionBegin; 948 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 949 950 /* compute prefixes */ 951 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 952 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 953 if (!pcbddc->current_level) { 954 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 955 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 956 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 957 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 958 } else { 959 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 960 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 961 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 962 len -= 15; /* remove "pc_bddc_coarse_" */ 963 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 964 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 965 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 966 *(dir_prefix+len)='\0'; 967 *(neu_prefix+len)='\0'; 968 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 969 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 970 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 971 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 972 } 973 974 /* DIRICHLET PROBLEM */ 975 /* Matrix for Dirichlet problem is pcis->A_II */ 976 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 977 if (!pcbddc->ksp_D) { /* create object if not yet build */ 978 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 979 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 980 /* default */ 981 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 982 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 983 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 984 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 985 if (issbaij) { 986 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 987 } else { 988 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 989 } 990 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 991 } 992 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 993 /* Allow user's customization */ 994 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 995 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 996 if (!n_D) { 997 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 998 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 999 } 1000 /* Set Up KSP for Dirichlet problem of BDDC */ 1001 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1002 /* set ksp_D into pcis data */ 1003 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1004 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1005 pcis->ksp_D = pcbddc->ksp_D; 1006 1007 /* NEUMANN PROBLEM */ 1008 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1009 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1010 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1011 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1012 if (ibs != mbs) { 1013 Mat newmat; 1014 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1015 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1016 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1017 } else { 1018 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1019 } 1020 if (!pcbddc->ksp_R) { /* create object if not yet build */ 1021 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1022 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1023 /* default */ 1024 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1025 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1026 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1027 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1028 if (issbaij) { 1029 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1030 } else { 1031 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1032 } 1033 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1034 } 1035 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1036 /* Allow user's customization */ 1037 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1038 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1039 if (!n_R) { 1040 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1041 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1042 } 1043 /* Set Up KSP for Neumann problem of BDDC */ 1044 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1045 1046 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1047 1048 /* Dirichlet */ 1049 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 1050 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1051 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1052 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 1053 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 1054 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1055 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1056 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1057 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1058 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1059 /* need to be adapted? */ 1060 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1061 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1062 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1063 /* print info */ 1064 if (pcbddc->dbg_flag) { 1065 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1066 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1067 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1068 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1069 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1070 } 1071 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1072 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1073 } 1074 1075 /* Neumann */ 1076 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 1077 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1078 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1079 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 1080 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 1081 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1082 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1083 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1084 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1085 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1086 /* need to be adapted? */ 1087 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1088 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1089 /* print info */ 1090 if (pcbddc->dbg_flag) { 1091 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1092 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1093 } 1094 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1095 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1096 } 1097 /* free Neumann problem's matrix */ 1098 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1104 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc) 1105 { 1106 PetscErrorCode ierr; 1107 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1108 1109 PetscFunctionBegin; 1110 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1111 if (pcbddc->local_auxmat1) { 1112 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1113 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1114 } 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1120 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1121 { 1122 PetscErrorCode ierr; 1123 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1124 PC_IS* pcis = (PC_IS*) (pc->data); 1125 const PetscScalar zero = 0.0; 1126 1127 PetscFunctionBegin; 1128 /* Application of PSI^T */ 1129 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1130 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1131 /* Scatter data of coarse_rhs */ 1132 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1133 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1134 1135 /* Local solution on R nodes */ 1136 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1137 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1139 if (pcbddc->switch_static) { 1140 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1141 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1142 } 1143 ierr = PCBDDCSolveSubstructureCorrection(pc);CHKERRQ(ierr); 1144 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1145 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1146 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1147 if (pcbddc->switch_static) { 1148 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1149 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1150 } 1151 1152 /* Coarse solution */ 1153 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1154 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1155 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1156 } 1157 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1158 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 1160 /* Sum contributions from two levels */ 1161 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1162 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1163 PetscFunctionReturn(0); 1164 } 1165 1166 #undef __FUNCT__ 1167 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1168 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1169 { 1170 PetscErrorCode ierr; 1171 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1172 1173 PetscFunctionBegin; 1174 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1180 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1181 { 1182 PetscErrorCode ierr; 1183 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1184 1185 PetscFunctionBegin; 1186 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1187 PetscFunctionReturn(0); 1188 } 1189 1190 /* uncomment for testing purposes */ 1191 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1194 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1195 { 1196 PetscErrorCode ierr; 1197 PC_IS* pcis = (PC_IS*)(pc->data); 1198 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1199 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1200 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1201 MatType impMatType=MATSEQAIJ; 1202 /* one and zero */ 1203 PetscScalar one=1.0,zero=0.0; 1204 /* space to store constraints and their local indices */ 1205 PetscScalar *temp_quadrature_constraint; 1206 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1207 /* iterators */ 1208 PetscInt i,j,k,total_counts,temp_start_ptr; 1209 /* stuff to store connected components stored in pcbddc->mat_graph */ 1210 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1211 PetscInt n_ISForFaces,n_ISForEdges; 1212 /* near null space stuff */ 1213 MatNullSpace nearnullsp; 1214 const Vec *nearnullvecs; 1215 Vec *localnearnullsp; 1216 PetscBool nnsp_has_cnst; 1217 PetscInt nnsp_size; 1218 PetscScalar *array; 1219 /* BLAS integers */ 1220 PetscBLASInt lwork,lierr; 1221 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1222 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1223 /* LAPACK working arrays for SVD or POD */ 1224 PetscBool skip_lapack; 1225 PetscScalar *work; 1226 PetscReal *singular_vals; 1227 #if defined(PETSC_USE_COMPLEX) 1228 PetscReal *rwork; 1229 #endif 1230 #if defined(PETSC_MISSING_LAPACK_GESVD) 1231 PetscBLASInt Blas_one_2=1; 1232 PetscScalar *temp_basis,*correlation_mat; 1233 #else 1234 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1235 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1236 #endif 1237 /* change of basis */ 1238 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1239 PetscBool boolforchange,qr_needed; 1240 PetscBT touched,change_basis,qr_needed_idx; 1241 /* auxiliary stuff */ 1242 PetscInt *nnz,*is_indices,*local_to_B; 1243 /* some quantities */ 1244 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1245 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1246 1247 1248 PetscFunctionBegin; 1249 /* Get index sets for faces, edges and vertices from graph */ 1250 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1251 pcbddc->use_vertices = PETSC_TRUE; 1252 } 1253 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1254 /* HACK: provide functions to set change of basis */ 1255 if (!ISForVertices && pcbddc->NullSpace) { 1256 pcbddc->use_change_of_basis = PETSC_TRUE; 1257 pcbddc->use_change_on_faces = PETSC_FALSE; 1258 } 1259 /* print some info */ 1260 if (pcbddc->dbg_flag) { 1261 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1262 i = 0; 1263 if (ISForVertices) { 1264 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1265 } 1266 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1267 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1268 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1269 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1270 } 1271 /* check if near null space is attached to global mat */ 1272 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1273 if (nearnullsp) { 1274 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1275 } else { /* if near null space is not provided BDDC uses constants by default */ 1276 nnsp_size = 0; 1277 nnsp_has_cnst = PETSC_TRUE; 1278 } 1279 /* get max number of constraints on a single cc */ 1280 max_constraints = nnsp_size; 1281 if (nnsp_has_cnst) max_constraints++; 1282 1283 /* 1284 Evaluate maximum storage size needed by the procedure 1285 - temp_indices will contain start index of each constraint stored as follows 1286 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1287 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1288 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1289 */ 1290 total_counts = n_ISForFaces+n_ISForEdges; 1291 total_counts *= max_constraints; 1292 n_vertices = 0; 1293 if (ISForVertices) { 1294 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1295 } 1296 total_counts += n_vertices; 1297 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1298 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1299 total_counts = 0; 1300 max_size_of_constraint = 0; 1301 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1302 if (i<n_ISForEdges) { 1303 used_IS = &ISForEdges[i]; 1304 } else { 1305 used_IS = &ISForFaces[i-n_ISForEdges]; 1306 } 1307 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1308 total_counts += j; 1309 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1310 } 1311 total_counts *= max_constraints; 1312 total_counts += n_vertices; 1313 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1314 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1315 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1316 /* local to boundary numbering */ 1317 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1318 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1319 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1320 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1321 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1322 /* get local part of global near null space vectors */ 1323 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1324 for (k=0;k<nnsp_size;k++) { 1325 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1326 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1327 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1328 } 1329 1330 /* whether or not to skip lapack calls */ 1331 skip_lapack = PETSC_TRUE; 1332 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1333 1334 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1335 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1336 PetscScalar temp_work; 1337 #if defined(PETSC_MISSING_LAPACK_GESVD) 1338 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1339 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1340 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1341 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1342 #if defined(PETSC_USE_COMPLEX) 1343 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1344 #endif 1345 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1346 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1347 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1348 lwork = -1; 1349 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1350 #if !defined(PETSC_USE_COMPLEX) 1351 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1352 #else 1353 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1354 #endif 1355 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1356 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1357 #else /* on missing GESVD */ 1358 /* SVD */ 1359 PetscInt max_n,min_n; 1360 max_n = max_size_of_constraint; 1361 min_n = max_constraints; 1362 if (max_size_of_constraint < max_constraints) { 1363 min_n = max_size_of_constraint; 1364 max_n = max_constraints; 1365 } 1366 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1367 #if defined(PETSC_USE_COMPLEX) 1368 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1369 #endif 1370 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1371 lwork = -1; 1372 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1373 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1374 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1375 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1376 #if !defined(PETSC_USE_COMPLEX) 1377 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1378 #else 1379 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1380 #endif 1381 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1382 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1383 #endif /* on missing GESVD */ 1384 /* Allocate optimal workspace */ 1385 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1386 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1387 } 1388 /* Now we can loop on constraining sets */ 1389 total_counts = 0; 1390 temp_indices[0] = 0; 1391 /* vertices */ 1392 if (ISForVertices) { 1393 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1394 if (nnsp_has_cnst) { /* consider all vertices */ 1395 for (i=0;i<n_vertices;i++) { 1396 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1397 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1398 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1399 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1400 total_counts++; 1401 } 1402 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1403 PetscBool used_vertex; 1404 for (i=0;i<n_vertices;i++) { 1405 used_vertex = PETSC_FALSE; 1406 k = 0; 1407 while (!used_vertex && k<nnsp_size) { 1408 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1409 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1410 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1411 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1412 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1413 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1414 total_counts++; 1415 used_vertex = PETSC_TRUE; 1416 } 1417 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1418 k++; 1419 } 1420 } 1421 } 1422 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1423 n_vertices = total_counts; 1424 } 1425 1426 /* edges and faces */ 1427 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1428 if (i<n_ISForEdges) { 1429 used_IS = &ISForEdges[i]; 1430 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1431 } else { 1432 used_IS = &ISForFaces[i-n_ISForEdges]; 1433 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1434 } 1435 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1436 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1437 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1438 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1439 /* change of basis should not be performed on local periodic nodes */ 1440 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1441 if (nnsp_has_cnst) { 1442 PetscScalar quad_value; 1443 temp_constraints++; 1444 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1445 for (j=0;j<size_of_constraint;j++) { 1446 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1447 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1448 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1449 } 1450 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1451 total_counts++; 1452 } 1453 for (k=0;k<nnsp_size;k++) { 1454 PetscReal real_value; 1455 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1456 for (j=0;j<size_of_constraint;j++) { 1457 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1458 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1459 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1460 } 1461 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1462 /* check if array is null on the connected component */ 1463 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1464 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1465 if (real_value > 0.0) { /* keep indices and values */ 1466 temp_constraints++; 1467 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1468 total_counts++; 1469 } 1470 } 1471 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1472 valid_constraints = temp_constraints; 1473 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1474 if (!pcbddc->use_nnsp_true && temp_constraints) { 1475 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1476 1477 #if defined(PETSC_MISSING_LAPACK_GESVD) 1478 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1479 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1480 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1481 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1482 from that computed using LAPACKgesvd 1483 -> This is due to a different computation of eigenvectors in LAPACKheev 1484 -> The quality of the POD-computed basis will be the same */ 1485 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1486 /* Store upper triangular part of correlation matrix */ 1487 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1488 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1489 for (j=0;j<temp_constraints;j++) { 1490 for (k=0;k<j+1;k++) { 1491 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1492 } 1493 } 1494 /* compute eigenvalues and eigenvectors of correlation matrix */ 1495 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1496 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1497 #if !defined(PETSC_USE_COMPLEX) 1498 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1499 #else 1500 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1501 #endif 1502 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1503 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1504 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1505 j=0; 1506 while (j < temp_constraints && singular_vals[j] < tol) j++; 1507 total_counts=total_counts-j; 1508 valid_constraints = temp_constraints-j; 1509 /* scale and copy POD basis into used quadrature memory */ 1510 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1511 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1512 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1513 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1514 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1515 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1516 if (j<temp_constraints) { 1517 PetscInt ii; 1518 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1519 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1520 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1521 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1522 for (k=0;k<temp_constraints-j;k++) { 1523 for (ii=0;ii<size_of_constraint;ii++) { 1524 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1525 } 1526 } 1527 } 1528 #else /* on missing GESVD */ 1529 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1530 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1531 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1532 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1533 #if !defined(PETSC_USE_COMPLEX) 1534 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1535 #else 1536 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1537 #endif 1538 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1539 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1540 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1541 k = temp_constraints; 1542 if (k > size_of_constraint) k = size_of_constraint; 1543 j = 0; 1544 while (j < k && singular_vals[k-j-1] < tol) j++; 1545 total_counts = total_counts-temp_constraints+k-j; 1546 valid_constraints = k-j; 1547 #endif /* on missing GESVD */ 1548 } 1549 /* setting change_of_basis flag is safe now */ 1550 if (boolforchange) { 1551 for (j=0;j<valid_constraints;j++) { 1552 PetscBTSet(change_basis,total_counts-j-1); 1553 } 1554 } 1555 } 1556 /* free index sets of faces, edges and vertices */ 1557 for (i=0;i<n_ISForFaces;i++) { 1558 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1559 } 1560 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1561 for (i=0;i<n_ISForEdges;i++) { 1562 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1563 } 1564 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1565 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1566 1567 /* free workspace */ 1568 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1569 ierr = PetscFree(work);CHKERRQ(ierr); 1570 #if defined(PETSC_USE_COMPLEX) 1571 ierr = PetscFree(rwork);CHKERRQ(ierr); 1572 #endif 1573 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1574 #if defined(PETSC_MISSING_LAPACK_GESVD) 1575 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1576 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1577 #endif 1578 } 1579 for (k=0;k<nnsp_size;k++) { 1580 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1581 } 1582 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1583 1584 /* set quantities in pcbddc data structure */ 1585 /* n_vertices defines the number of subdomain corners in the primal space */ 1586 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1587 pcbddc->local_primal_size = total_counts; 1588 pcbddc->n_vertices = n_vertices; 1589 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1590 1591 /* Create constraint matrix */ 1592 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1593 /* so we need to set it up properly either with or without change of basis */ 1594 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1595 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1596 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1597 /* array to compute a local numbering of constraints : vertices first then constraints */ 1598 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1599 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1600 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1601 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1602 /* auxiliary stuff for basis change */ 1603 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1604 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1605 1606 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1607 total_primal_vertices=0; 1608 for (i=0;i<pcbddc->local_primal_size;i++) { 1609 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1610 if (size_of_constraint == 1) { 1611 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1612 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1613 aux_primal_minloc[total_primal_vertices]=0; 1614 total_primal_vertices++; 1615 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1616 PetscInt min_loc,min_index; 1617 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1618 /* find first untouched local node */ 1619 k = 0; 1620 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1621 min_index = global_indices[k]; 1622 min_loc = k; 1623 /* search the minimum among global nodes already untouched on the cc */ 1624 for (k=1;k<size_of_constraint;k++) { 1625 /* there can be more than one constraint on a single connected component */ 1626 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1627 min_index = global_indices[k]; 1628 min_loc = k; 1629 } 1630 } 1631 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1632 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1633 aux_primal_minloc[total_primal_vertices]=min_loc; 1634 total_primal_vertices++; 1635 } 1636 } 1637 /* determine if a QR strategy is needed for change of basis */ 1638 qr_needed = PETSC_FALSE; 1639 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1640 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1641 if (PetscBTLookup(change_basis,i)) { 1642 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1643 j = 0; 1644 for (k=0;k<size_of_constraint;k++) { 1645 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1646 j++; 1647 } 1648 } 1649 /* found more than one primal dof on the cc */ 1650 if (j > 1) { 1651 PetscBTSet(qr_needed_idx,i); 1652 qr_needed = PETSC_TRUE; 1653 } 1654 } 1655 } 1656 /* free workspace */ 1657 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1658 1659 /* permute indices in order to have a sorted set of vertices */ 1660 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1661 1662 /* nonzero structure of constraint matrix */ 1663 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1664 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1665 j=total_primal_vertices; 1666 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1667 if (!PetscBTLookup(change_basis,i)) { 1668 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1669 j++; 1670 } 1671 } 1672 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1673 ierr = PetscFree(nnz);CHKERRQ(ierr); 1674 /* set values in constraint matrix */ 1675 for (i=0;i<total_primal_vertices;i++) { 1676 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1677 } 1678 total_counts = total_primal_vertices; 1679 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1680 if (!PetscBTLookup(change_basis,i)) { 1681 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1682 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1683 total_counts++; 1684 } 1685 } 1686 /* assembling */ 1687 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1689 /* 1690 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1691 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1692 */ 1693 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1694 if (pcbddc->use_change_of_basis) { 1695 /* dual and primal dofs on a single cc */ 1696 PetscInt dual_dofs,primal_dofs; 1697 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1698 PetscInt primal_counter; 1699 /* working stuff for GEQRF */ 1700 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1701 PetscBLASInt lqr_work; 1702 /* working stuff for UNGQR */ 1703 PetscScalar *gqr_work,lgqr_work_t; 1704 PetscBLASInt lgqr_work; 1705 /* working stuff for TRTRS */ 1706 PetscScalar *trs_rhs; 1707 PetscBLASInt Blas_NRHS; 1708 /* pointers for values insertion into change of basis matrix */ 1709 PetscInt *start_rows,*start_cols; 1710 PetscScalar *start_vals; 1711 /* working stuff for values insertion */ 1712 PetscBT is_primal; 1713 1714 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1715 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1716 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1717 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1718 /* work arrays */ 1719 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1720 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1721 /* nonzeros per row */ 1722 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1723 if (PetscBTLookup(change_basis,i)) { 1724 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1725 if (PetscBTLookup(qr_needed_idx,i)) { 1726 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1727 } else { 1728 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1729 /* get local primal index on the cc */ 1730 j = 0; 1731 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1732 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1733 } 1734 } 1735 } 1736 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1737 ierr = PetscFree(nnz);CHKERRQ(ierr); 1738 /* Set initial identity in the matrix */ 1739 for (i=0;i<pcis->n_B;i++) { 1740 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1741 } 1742 1743 if (pcbddc->dbg_flag) { 1744 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1745 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1746 } 1747 1748 1749 /* Now we loop on the constraints which need a change of basis */ 1750 /* 1751 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1752 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1753 1754 Basic blocks of change of basis matrix T computed 1755 1756 - Using the following block transformation if there is only a primal dof on the cc 1757 (in the example, primal dof is the last one of the edge in LOCAL ordering 1758 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1759 | 1 0 ... 0 1 | 1760 | 0 1 ... 0 1 | 1761 | ... | 1762 | 0 ... 1 1 | 1763 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1764 1765 - via QR decomposition of constraints otherwise 1766 */ 1767 if (qr_needed) { 1768 /* space to store Q */ 1769 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1770 /* first we issue queries for optimal work */ 1771 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1772 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1773 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1774 lqr_work = -1; 1775 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1776 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1777 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1778 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1779 lgqr_work = -1; 1780 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1781 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1782 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1783 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1784 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1785 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1786 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1787 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1788 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1789 /* array to store scaling factors for reflectors */ 1790 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1791 /* array to store rhs and solution of triangular solver */ 1792 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1793 /* allocating workspace for check */ 1794 if (pcbddc->dbg_flag) { 1795 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1796 } 1797 } 1798 /* array to store whether a node is primal or not */ 1799 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1800 for (i=0;i<total_primal_vertices;i++) { 1801 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1802 } 1803 1804 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1805 /* -> using implicit ordering contained in temp_indices data */ 1806 total_counts = pcbddc->n_vertices; 1807 primal_counter = total_counts; 1808 while (total_counts<pcbddc->local_primal_size) { 1809 primal_dofs = 1; 1810 if (PetscBTLookup(change_basis,total_counts)) { 1811 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1812 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1813 primal_dofs++; 1814 } 1815 /* get constraint info */ 1816 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1817 dual_dofs = size_of_constraint-primal_dofs; 1818 1819 if (pcbddc->dbg_flag) { 1820 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1821 } 1822 1823 if (primal_dofs > 1) { /* QR */ 1824 1825 /* copy quadrature constraints for change of basis check */ 1826 if (pcbddc->dbg_flag) { 1827 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1828 } 1829 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1830 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1831 1832 /* compute QR decomposition of constraints */ 1833 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1834 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1835 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1836 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1837 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1838 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1839 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1840 1841 /* explictly compute R^-T */ 1842 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1843 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1844 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1845 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1846 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1847 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1848 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1849 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1850 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1851 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1852 1853 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1854 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1855 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1856 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1857 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1858 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1859 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1860 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1861 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1862 1863 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1864 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1865 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1866 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1867 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1868 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1869 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1870 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1871 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1872 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1873 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1874 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1875 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1876 1877 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1878 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1879 /* insert cols for primal dofs */ 1880 for (j=0;j<primal_dofs;j++) { 1881 start_vals = &qr_basis[j*size_of_constraint]; 1882 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1883 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1884 } 1885 /* insert cols for dual dofs */ 1886 for (j=0,k=0;j<dual_dofs;k++) { 1887 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1888 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1889 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1890 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1891 j++; 1892 } 1893 } 1894 1895 /* check change of basis */ 1896 if (pcbddc->dbg_flag) { 1897 PetscInt ii,jj; 1898 PetscBool valid_qr=PETSC_TRUE; 1899 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1900 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1901 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1902 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1903 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1904 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1905 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1906 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1907 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1908 for (jj=0;jj<size_of_constraint;jj++) { 1909 for (ii=0;ii<primal_dofs;ii++) { 1910 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1911 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1912 } 1913 } 1914 if (!valid_qr) { 1915 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1916 for (jj=0;jj<size_of_constraint;jj++) { 1917 for (ii=0;ii<primal_dofs;ii++) { 1918 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1919 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1920 } 1921 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1922 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1923 } 1924 } 1925 } 1926 } else { 1927 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1928 } 1929 } 1930 } else { /* simple transformation block */ 1931 PetscInt row,col; 1932 PetscScalar val; 1933 for (j=0;j<size_of_constraint;j++) { 1934 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 1935 if (!PetscBTLookup(is_primal,row)) { 1936 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1937 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 1938 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 1939 } else { 1940 for (k=0;k<size_of_constraint;k++) { 1941 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1942 if (row != col) { 1943 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1944 } else { 1945 val = 1.0; 1946 } 1947 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 1948 } 1949 } 1950 } 1951 } 1952 /* increment primal counter */ 1953 primal_counter += primal_dofs; 1954 } else { 1955 if (pcbddc->dbg_flag) { 1956 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1957 } 1958 } 1959 /* increment constraint counter total_counts */ 1960 total_counts += primal_dofs; 1961 } 1962 1963 /* free workspace */ 1964 if (qr_needed) { 1965 if (pcbddc->dbg_flag) { 1966 ierr = PetscFree(work);CHKERRQ(ierr); 1967 } 1968 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1969 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1970 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1971 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1972 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1973 } 1974 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1975 /* assembling */ 1976 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1977 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1978 /* 1979 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1980 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1981 */ 1982 } 1983 1984 /* flush dbg viewer */ 1985 if (pcbddc->dbg_flag) { 1986 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1987 } 1988 1989 /* free workspace */ 1990 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1991 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 1992 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1993 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1994 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1995 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1996 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1997 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1998 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1999 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2000 PetscFunctionReturn(0); 2001 } 2002 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2005 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2006 { 2007 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2008 PC_IS *pcis = (PC_IS*)pc->data; 2009 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2010 PetscInt bs,ierr,i,vertex_size; 2011 PetscViewer viewer=pcbddc->dbg_viewer; 2012 2013 PetscFunctionBegin; 2014 /* Init local Graph struct */ 2015 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2016 2017 /* Check validity of the csr graph passed in by the user */ 2018 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2019 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2020 } 2021 2022 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2023 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2024 Mat mat_adj; 2025 const PetscInt *xadj,*adjncy; 2026 PetscBool flg_row=PETSC_TRUE; 2027 2028 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2029 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2030 if (!flg_row) { 2031 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2032 } 2033 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2034 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2035 if (!flg_row) { 2036 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2037 } 2038 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2039 } 2040 2041 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2042 vertex_size = 1; 2043 if (!pcbddc->user_provided_isfordofs) { 2044 if (!pcbddc->n_ISForDofs) { 2045 IS *custom_ISForDofs; 2046 2047 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2048 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2049 for (i=0;i<bs;i++) { 2050 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2051 } 2052 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2053 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2054 /* remove my references to IS objects */ 2055 for (i=0;i<bs;i++) { 2056 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2057 } 2058 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2059 } 2060 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2061 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2062 } 2063 2064 /* Setup of Graph */ 2065 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2066 2067 /* Graph's connected components analysis */ 2068 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2069 2070 /* print some info to stdout */ 2071 if (pcbddc->dbg_flag) { 2072 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2073 } 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2079 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2080 { 2081 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2082 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2083 PetscErrorCode ierr; 2084 2085 PetscFunctionBegin; 2086 n = 0; 2087 vertices = 0; 2088 if (pcbddc->ConstraintMatrix) { 2089 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2090 for (i=0;i<local_primal_size;i++) { 2091 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2092 if (size_of_constraint == 1) n++; 2093 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2094 } 2095 if (vertices_idx) { 2096 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2097 n = 0; 2098 for (i=0;i<local_primal_size;i++) { 2099 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2100 if (size_of_constraint == 1) { 2101 vertices[n++]=row_cmat_indices[0]; 2102 } 2103 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2104 } 2105 } 2106 } 2107 *n_vertices = n; 2108 if (vertices_idx) *vertices_idx = vertices; 2109 PetscFunctionReturn(0); 2110 } 2111 2112 #undef __FUNCT__ 2113 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2114 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2115 { 2116 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2117 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2118 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2119 PetscBT touched; 2120 PetscErrorCode ierr; 2121 2122 /* This function assumes that the number of local constraints per connected component 2123 is not greater than the number of nodes defined for the connected component 2124 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2125 PetscFunctionBegin; 2126 n = 0; 2127 constraints_index = 0; 2128 if (pcbddc->ConstraintMatrix) { 2129 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2130 max_size_of_constraint = 0; 2131 for (i=0;i<local_primal_size;i++) { 2132 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2133 if (size_of_constraint > 1) { 2134 n++; 2135 } 2136 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2137 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2138 } 2139 if (constraints_idx) { 2140 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2141 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2142 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2143 n = 0; 2144 for (i=0;i<local_primal_size;i++) { 2145 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2146 if (size_of_constraint > 1) { 2147 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2148 /* find first untouched local node */ 2149 j = 0; 2150 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2151 min_index = row_cmat_global_indices[j]; 2152 min_loc = j; 2153 /* search the minimum among nodes not yet touched on the connected component 2154 since there can be more than one constraint on a single cc */ 2155 for (j=1;j<size_of_constraint;j++) { 2156 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2157 min_index = row_cmat_global_indices[j]; 2158 min_loc = j; 2159 } 2160 } 2161 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2162 constraints_index[n++] = row_cmat_indices[min_loc]; 2163 } 2164 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2165 } 2166 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2167 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2168 } 2169 } 2170 *n_constraints = n; 2171 if (constraints_idx) *constraints_idx = constraints_index; 2172 PetscFunctionReturn(0); 2173 } 2174 2175 /* the next two functions has been adapted from pcis.c */ 2176 #undef __FUNCT__ 2177 #define __FUNCT__ "PCBDDCApplySchur" 2178 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2179 { 2180 PetscErrorCode ierr; 2181 PC_IS *pcis = (PC_IS*)(pc->data); 2182 2183 PetscFunctionBegin; 2184 if (!vec2_B) { vec2_B = v; } 2185 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2186 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2187 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2188 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2189 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2190 PetscFunctionReturn(0); 2191 } 2192 2193 #undef __FUNCT__ 2194 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2195 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2196 { 2197 PetscErrorCode ierr; 2198 PC_IS *pcis = (PC_IS*)(pc->data); 2199 2200 PetscFunctionBegin; 2201 if (!vec2_B) { vec2_B = v; } 2202 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2203 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2204 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2205 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2206 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2207 PetscFunctionReturn(0); 2208 } 2209 2210 #undef __FUNCT__ 2211 #define __FUNCT__ "PCBDDCSubsetNumbering" 2212 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2213 { 2214 Vec local_vec,global_vec; 2215 IS seqis,paris; 2216 VecScatter scatter_ctx; 2217 PetscScalar *array; 2218 PetscInt *temp_global_dofs; 2219 PetscScalar globalsum; 2220 PetscInt i,j,s; 2221 PetscInt nlocals,first_index,old_index,max_local; 2222 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2223 PetscMPIInt *dof_sizes,*dof_displs; 2224 PetscBool first_found; 2225 PetscErrorCode ierr; 2226 2227 PetscFunctionBegin; 2228 /* mpi buffers */ 2229 MPI_Comm_size(comm,&size_prec_comm); 2230 MPI_Comm_rank(comm,&rank_prec_comm); 2231 j = ( !rank_prec_comm ? size_prec_comm : 0); 2232 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2233 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2234 /* get maximum size of subset */ 2235 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2236 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2237 max_local = 0; 2238 if (n_local_dofs) { 2239 max_local = temp_global_dofs[0]; 2240 for (i=1;i<n_local_dofs;i++) { 2241 if (max_local < temp_global_dofs[i] ) { 2242 max_local = temp_global_dofs[i]; 2243 } 2244 } 2245 } 2246 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2247 max_global++; 2248 max_local = 0; 2249 if (n_local_dofs) { 2250 max_local = local_dofs[0]; 2251 for (i=1;i<n_local_dofs;i++) { 2252 if (max_local < local_dofs[i] ) { 2253 max_local = local_dofs[i]; 2254 } 2255 } 2256 } 2257 max_local++; 2258 /* allocate workspace */ 2259 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2260 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2261 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2262 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2263 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2264 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2265 /* create scatter */ 2266 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2267 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2268 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2269 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2270 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2271 /* init array */ 2272 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2273 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2274 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2275 if (local_dofs_mult) { 2276 for (i=0;i<n_local_dofs;i++) { 2277 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2278 } 2279 } else { 2280 for (i=0;i<n_local_dofs;i++) { 2281 array[local_dofs[i]]=1.0; 2282 } 2283 } 2284 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2285 /* scatter into global vec and get total number of global dofs */ 2286 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2287 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2288 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2289 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2290 /* Fill global_vec with cumulative function for global numbering */ 2291 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2292 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2293 nlocals = 0; 2294 first_index = -1; 2295 first_found = PETSC_FALSE; 2296 for (i=0;i<s;i++) { 2297 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2298 first_found = PETSC_TRUE; 2299 first_index = i; 2300 } 2301 nlocals += (PetscInt)PetscRealPart(array[i]); 2302 } 2303 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2304 if (!rank_prec_comm) { 2305 dof_displs[0]=0; 2306 for (i=1;i<size_prec_comm;i++) { 2307 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2308 } 2309 } 2310 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2311 if (first_found) { 2312 array[first_index] += (PetscScalar)nlocals; 2313 old_index = first_index; 2314 for (i=first_index+1;i<s;i++) { 2315 if (PetscRealPart(array[i]) > 0.0) { 2316 array[i] += array[old_index]; 2317 old_index = i; 2318 } 2319 } 2320 } 2321 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2322 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2323 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2324 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2325 /* get global ordering of local dofs */ 2326 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2327 if (local_dofs_mult) { 2328 for (i=0;i<n_local_dofs;i++) { 2329 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2330 } 2331 } else { 2332 for (i=0;i<n_local_dofs;i++) { 2333 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2334 } 2335 } 2336 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2337 /* free workspace */ 2338 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2339 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2340 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2341 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2342 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2343 /* return pointer to global ordering of local dofs */ 2344 *global_numbering_subset = temp_global_dofs; 2345 PetscFunctionReturn(0); 2346 } 2347 2348 #undef __FUNCT__ 2349 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2350 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2351 { 2352 PetscInt i,j; 2353 PetscScalar *alphas; 2354 PetscErrorCode ierr; 2355 2356 PetscFunctionBegin; 2357 /* this implements stabilized Gram-Schmidt */ 2358 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2359 for (i=0;i<n;i++) { 2360 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2361 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2362 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2363 } 2364 ierr = PetscFree(alphas);CHKERRQ(ierr); 2365 PetscFunctionReturn(0); 2366 } 2367 2368 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatConvert_IS_AIJ" 2371 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2372 { 2373 Mat new_mat; 2374 Mat_IS *matis = (Mat_IS*)(mat->data); 2375 /* info on mat */ 2376 PetscInt bs,rows,cols; 2377 PetscInt lrows,lcols; 2378 PetscInt local_rows,local_cols; 2379 PetscMPIInt nsubdomains; 2380 /* preallocation */ 2381 Vec vec_dnz,vec_onz; 2382 PetscScalar *my_dnz,*my_onz,*array; 2383 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2384 PetscInt index_row,index_col,owner; 2385 PetscInt *local_indices,*global_indices; 2386 /* work */ 2387 PetscInt i,j; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 /* CHECKS */ 2392 /* get info from mat */ 2393 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2394 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2395 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2396 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2397 2398 /* MAT_INITIAL_MATRIX */ 2399 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2400 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2401 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2402 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2403 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2404 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2405 2406 /* 2407 preallocation 2408 */ 2409 2410 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2411 /* 2412 Some vectors are needed to sum up properly on shared interface dofs. 2413 Note that preallocation is not exact, since it overestimates nonzeros 2414 */ 2415 /* 2416 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2417 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2418 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2419 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2420 */ 2421 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2422 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2423 /* All processes need to compute entire row ownership */ 2424 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2425 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2426 for (i=0;i<nsubdomains;i++) { 2427 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2428 row_ownership[j]=i; 2429 } 2430 } 2431 /* map indices of local mat to global values */ 2432 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2433 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2434 for (i=0;i<local_rows;i++) local_indices[i]=i; 2435 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2436 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2437 2438 /* 2439 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2440 then, they will be summed up properly. This way, preallocation is always sufficient 2441 */ 2442 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2443 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2444 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2445 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2446 for (i=0;i<local_rows;i++) { 2447 index_row = global_indices[i]; 2448 for (j=i;j<local_rows;j++) { 2449 owner = row_ownership[index_row]; 2450 index_col = global_indices[j]; 2451 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2452 my_dnz[i] += 1.0; 2453 } else { /* offdiag block */ 2454 my_onz[i] += 1.0; 2455 } 2456 /* same as before, interchanging rows and cols */ 2457 if (i != j) { 2458 owner = row_ownership[index_col]; 2459 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2460 my_dnz[j] += 1.0; 2461 } else { 2462 my_onz[j] += 1.0; 2463 } 2464 } 2465 } 2466 } 2467 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2468 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2469 if (local_rows) { /* multilevel guard */ 2470 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2471 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2472 } 2473 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2474 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2475 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2476 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2477 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2478 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2479 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2480 2481 /* set computed preallocation in dnz and onz */ 2482 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2483 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2484 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2485 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2486 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2487 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2488 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2489 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2490 2491 /* Resize preallocation if overestimated */ 2492 for (i=0;i<lrows;i++) { 2493 dnz[i] = PetscMin(dnz[i],lcols); 2494 onz[i] = PetscMin(onz[i],cols-lcols); 2495 } 2496 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2497 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2498 2499 /* 2500 Set values. Very Basic. 2501 */ 2502 for (i=0;i<local_rows;i++) { 2503 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2504 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2505 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2506 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2507 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2508 } 2509 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2510 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2511 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2512 2513 /* get back mat */ 2514 *M = new_mat; 2515 PetscFunctionReturn(0); 2516 } 2517 2518 #undef __FUNCT__ 2519 #define __FUNCT__ "MatISSubassemble_Private" 2520 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2521 { 2522 Mat subdomain_adj; 2523 IS new_ranks,ranks_send_to; 2524 MatPartitioning partitioner; 2525 Mat_IS *matis; 2526 PetscInt n_neighs,*neighs,*n_shared,**shared; 2527 PetscInt prank; 2528 PetscMPIInt size,rank,color; 2529 PetscInt *xadj,*adjncy,*oldranks; 2530 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2531 PetscInt i,j,n_subdomains,local_size,threshold=0; 2532 PetscErrorCode ierr; 2533 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2534 PetscSubcomm subcomm; 2535 2536 PetscFunctionBegin; 2537 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2538 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2539 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2540 2541 /* Get info on mapping */ 2542 matis = (Mat_IS*)(mat->data); 2543 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2544 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2545 2546 /* build local CSR graph of subdomains' connectivity */ 2547 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2548 xadj[0] = 0; 2549 xadj[1] = PetscMax(n_neighs-1,0); 2550 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2551 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2552 2553 if (threshold) { 2554 PetscInt* count,min_threshold; 2555 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2556 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2557 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2558 for (j=0;j<n_shared[i];j++) { 2559 count[shared[i][j]] += 1; 2560 } 2561 } 2562 /* adapt threshold since we dont want to lose significant connections */ 2563 min_threshold = n_neighs; 2564 for (i=1;i<n_neighs;i++) { 2565 for (j=0;j<n_shared[i];j++) { 2566 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2567 } 2568 } 2569 PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold); 2570 threshold = PetscMax(min_threshold+1,threshold); 2571 PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold); 2572 xadj[1] = 0; 2573 for (i=1;i<n_neighs;i++) { 2574 for (j=0;j<n_shared[i];j++) { 2575 if (count[shared[i][j]] < threshold) { 2576 adjncy[xadj[1]] = neighs[i]; 2577 adjncy_wgt[xadj[1]] = n_shared[i]; 2578 xadj[1]++; 2579 break; 2580 } 2581 } 2582 } 2583 ierr = PetscFree(count);CHKERRQ(ierr); 2584 } else { 2585 if (xadj[1]) { 2586 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2587 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2588 } 2589 } 2590 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2591 if (use_square) { 2592 for (i=0;i<xadj[1];i++) { 2593 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2594 } 2595 } 2596 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2597 2598 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2599 2600 /* 2601 Restrict work on active processes only. 2602 */ 2603 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2604 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2605 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2606 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2607 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2608 if (color) { 2609 ierr = PetscFree(xadj);CHKERRQ(ierr); 2610 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2611 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2612 } else { 2613 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2614 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2615 prank = rank; 2616 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2617 for (i=0;i<size;i++) { 2618 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2619 } 2620 for (i=0;i<xadj[1];i++) { 2621 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2622 } 2623 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2624 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2625 n_subdomains = (PetscInt)size; 2626 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2627 2628 /* Partition */ 2629 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2630 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2631 if (use_vwgt) { 2632 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2633 v_wgt[0] = local_size; 2634 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2635 } 2636 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2637 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2638 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2639 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2640 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2641 2642 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2643 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2644 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2645 /* clean up */ 2646 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2647 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2648 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2649 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2650 } 2651 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2652 2653 /* assemble parallel IS for sends */ 2654 i = 1; 2655 if (color) i=0; 2656 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2657 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2658 2659 /* get back IS */ 2660 *is_sends = ranks_send_to; 2661 PetscFunctionReturn(0); 2662 } 2663 2664 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2665 2666 #undef __FUNCT__ 2667 #define __FUNCT__ "MatISSubassemble" 2668 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2669 { 2670 Mat local_mat,new_mat; 2671 Mat_IS *matis; 2672 IS is_sends_internal; 2673 PetscInt rows,cols; 2674 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2675 PetscBool ismatis,isdense; 2676 ISLocalToGlobalMapping l2gmap; 2677 PetscInt* l2gmap_indices; 2678 const PetscInt* is_indices; 2679 MatType new_local_type; 2680 MatTypePrivate new_local_type_private; 2681 /* buffers */ 2682 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2683 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2684 /* MPI */ 2685 MPI_Comm comm; 2686 PetscMPIInt n_sends,n_recvs,commsize; 2687 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2688 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2689 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2690 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2691 PetscErrorCode ierr; 2692 2693 PetscFunctionBegin; 2694 /* checks */ 2695 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2696 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2697 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2698 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2699 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2700 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2701 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2702 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2703 PetscValidLogicalCollectiveInt(mat,bs,0); 2704 /* prepare IS for sending if not provided */ 2705 if (!is_sends) { 2706 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2707 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2708 } else { 2709 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2710 is_sends_internal = is_sends; 2711 } 2712 2713 /* get pointer of MATIS data */ 2714 matis = (Mat_IS*)mat->data; 2715 2716 /* get comm */ 2717 comm = PetscObjectComm((PetscObject)mat); 2718 2719 /* compute number of sends */ 2720 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2721 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2722 2723 /* compute number of receives */ 2724 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2725 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2726 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2727 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2728 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2729 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2730 ierr = PetscFree(iflags);CHKERRQ(ierr); 2731 2732 /* prepare send/receive buffers */ 2733 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2734 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2735 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2736 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2737 2738 /* Get data from local mat */ 2739 if (!isdense) { 2740 /* TODO: See below some guidelines on how to prepare the local buffers */ 2741 /* 2742 send_buffer_vals should contain the raw values of the local matrix 2743 send_buffer_idxs should contain: 2744 - MatType_PRIVATE type 2745 - PetscInt size_of_l2gmap 2746 - PetscInt global_row_indices[size_of_l2gmap] 2747 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2748 */ 2749 } else { 2750 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2751 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2752 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2753 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2754 send_buffer_idxs[1] = i; 2755 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2756 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2757 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2758 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2759 for (i=0;i<n_sends;i++) { 2760 ilengths_vals[is_indices[i]] = len*len; 2761 ilengths_idxs[is_indices[i]] = len+2; 2762 } 2763 } 2764 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2765 buf_size_idxs = 0; 2766 buf_size_vals = 0; 2767 for (i=0;i<n_recvs;i++) { 2768 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2769 buf_size_vals += (PetscInt)olengths_vals[i]; 2770 } 2771 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2772 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2773 2774 /* get new tags for clean communications */ 2775 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2776 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2777 2778 /* allocate for requests */ 2779 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2780 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2781 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2782 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2783 2784 /* communications */ 2785 ptr_idxs = recv_buffer_idxs; 2786 ptr_vals = recv_buffer_vals; 2787 for (i=0;i<n_recvs;i++) { 2788 source_dest = onodes[i]; 2789 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2790 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2791 ptr_idxs += olengths_idxs[i]; 2792 ptr_vals += olengths_vals[i]; 2793 } 2794 for (i=0;i<n_sends;i++) { 2795 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2796 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2797 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2798 } 2799 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2800 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2801 2802 /* assemble new l2g map */ 2803 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2804 ptr_idxs = recv_buffer_idxs; 2805 buf_size_idxs = 0; 2806 for (i=0;i<n_recvs;i++) { 2807 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2808 ptr_idxs += olengths_idxs[i]; 2809 } 2810 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2811 ptr_idxs = recv_buffer_idxs; 2812 buf_size_idxs = 0; 2813 for (i=0;i<n_recvs;i++) { 2814 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2815 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2816 ptr_idxs += olengths_idxs[i]; 2817 } 2818 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2819 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2820 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2821 2822 /* infer new local matrix type from received local matrices type */ 2823 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2824 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2825 new_local_type_private = MATAIJ_PRIVATE; 2826 new_local_type = MATSEQAIJ; 2827 if (n_recvs) { 2828 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 2829 ptr_idxs = recv_buffer_idxs; 2830 for (i=0;i<n_recvs;i++) { 2831 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2832 new_local_type_private = MATAIJ_PRIVATE; 2833 break; 2834 } 2835 ptr_idxs += olengths_idxs[i]; 2836 } 2837 switch (new_local_type_private) { 2838 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2839 new_local_type = MATSEQAIJ; 2840 bs = 1; 2841 break; 2842 case MATAIJ_PRIVATE: 2843 new_local_type = MATSEQAIJ; 2844 bs = 1; 2845 break; 2846 case MATBAIJ_PRIVATE: 2847 new_local_type = MATSEQBAIJ; 2848 break; 2849 case MATSBAIJ_PRIVATE: 2850 new_local_type = MATSEQSBAIJ; 2851 break; 2852 default: 2853 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2854 break; 2855 } 2856 } 2857 2858 /* create MATIS object */ 2859 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2860 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2861 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2862 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2863 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2864 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2865 2866 /* set values */ 2867 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2868 ptr_vals = recv_buffer_vals; 2869 ptr_idxs = recv_buffer_idxs; 2870 for (i=0;i<n_recvs;i++) { 2871 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2872 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2873 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2874 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2875 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2876 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2877 } 2878 ptr_idxs += olengths_idxs[i]; 2879 ptr_vals += olengths_vals[i]; 2880 } 2881 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2882 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2883 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2884 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2885 2886 { /* check */ 2887 Vec lvec,rvec; 2888 PetscReal infty_error; 2889 2890 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2891 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2892 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2893 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2894 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2895 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2896 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2897 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2898 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2899 } 2900 2901 /* free workspace */ 2902 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2903 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2904 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2905 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2906 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2907 if (isdense) { 2908 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2909 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2910 } else { 2911 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2912 } 2913 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2914 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2915 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2916 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2917 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2918 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2919 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2920 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2921 ierr = PetscFree(onodes);CHKERRQ(ierr); 2922 /* get back new mat */ 2923 *mat_n = new_mat; 2924 PetscFunctionReturn(0); 2925 } 2926 2927 #undef __FUNCT__ 2928 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2929 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2930 { 2931 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2932 PC_IS *pcis = (PC_IS*)pc->data; 2933 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2934 MatNullSpace CoarseNullSpace=NULL; 2935 ISLocalToGlobalMapping coarse_islg; 2936 IS coarse_is; 2937 PetscInt max_it,coarse_size,*local_primal_indices=NULL; 2938 PetscInt im_active=-1,active_procs=-1; 2939 PC pc_temp; 2940 PCType coarse_pc_type; 2941 KSPType coarse_ksp_type; 2942 PetscBool multilevel_requested,multilevel_allowed; 2943 PetscBool setsym,issym,isbddc,isnn; 2944 MatStructure matstruct; 2945 PetscErrorCode ierr; 2946 2947 PetscFunctionBegin; 2948 /* Assign global numbering to coarse dofs */ 2949 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2950 2951 /* infer some info from user */ 2952 issym = PETSC_FALSE; 2953 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2954 multilevel_allowed = PETSC_FALSE; 2955 multilevel_requested = PETSC_FALSE; 2956 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 2957 if (multilevel_requested) { 2958 /* count "active processes" */ 2959 im_active = !!(pcis->n); 2960 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2961 if (active_procs/pcbddc->coarsening_ratio < 2) { 2962 multilevel_allowed = PETSC_FALSE; 2963 } else { 2964 multilevel_allowed = PETSC_TRUE; 2965 } 2966 } 2967 2968 /* set defaults for coarse KSP and PC */ 2969 if (multilevel_allowed) { 2970 if (issym) { 2971 coarse_ksp_type = KSPRICHARDSON; 2972 } else { 2973 coarse_ksp_type = KSPCHEBYSHEV; 2974 } 2975 coarse_pc_type = PCBDDC; 2976 } else { 2977 coarse_ksp_type = KSPPREONLY; 2978 coarse_pc_type = PCREDUNDANT; 2979 } 2980 2981 /* create the coarse KSP object only once with defaults */ 2982 if (!pcbddc->coarse_ksp) { 2983 char prefix[256],str_level[3]; 2984 size_t len; 2985 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 2986 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2987 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 2988 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2989 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2990 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2991 /* prefix */ 2992 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 2993 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2994 if (!pcbddc->current_level) { 2995 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2996 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 2997 } else { 2998 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2999 if (pcbddc->current_level>1) len -= 2; 3000 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3001 *(prefix+len)='\0'; 3002 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3003 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3004 } 3005 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3006 } 3007 /* allow user customization */ 3008 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3009 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3010 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3011 3012 /* get some info after set from options */ 3013 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3014 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3015 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3016 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3017 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3018 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3019 isbddc = PETSC_FALSE; 3020 } 3021 3022 /* propagate BDDC info to the next level */ 3023 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3024 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3025 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3026 3027 /* creates temporary MATIS object for coarse matrix */ 3028 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3029 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3030 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3031 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3032 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3033 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3034 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3035 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3036 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3037 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 3038 3039 /* assemble coarse matrix */ 3040 if (isbddc || isnn) { 3041 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3042 } else { 3043 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3044 } 3045 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3046 3047 /* create local to global scatters for coarse problem */ 3048 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3049 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3050 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3051 3052 /* propagate symmetry info to coarse matrix */ 3053 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3054 3055 /* Compute coarse null space (special handling by BDDC only) */ 3056 if (pcbddc->NullSpace) { 3057 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3058 if (isbddc) { 3059 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3060 } else { 3061 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3062 } 3063 } 3064 3065 /* set operators */ 3066 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3067 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3068 3069 /* additional KSP customization */ 3070 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3071 if (max_it < 5) { 3072 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3073 } 3074 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3075 3076 3077 /* print some info if requested */ 3078 if (pcbddc->dbg_flag) { 3079 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3080 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3081 if (!multilevel_allowed) { 3082 if (multilevel_requested) { 3083 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3084 } else if (pcbddc->max_levels) { 3085 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3086 } 3087 } 3088 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3089 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3090 } 3091 3092 /* setup coarse ksp */ 3093 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3094 if (pcbddc->dbg_flag) { 3095 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3096 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3097 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3098 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3099 } 3100 3101 /* Check coarse problem if requested */ 3102 if (pcbddc->dbg_flag) { 3103 KSP check_ksp; 3104 KSPType check_ksp_type; 3105 PC check_pc; 3106 Vec check_vec; 3107 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3108 PetscInt its; 3109 PetscBool ispreonly,compute; 3110 3111 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3112 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3113 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3114 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3115 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3116 if (ispreonly) { 3117 check_ksp_type = KSPPREONLY; 3118 compute = PETSC_FALSE; 3119 } else { 3120 if (issym) check_ksp_type = KSPCG; 3121 else check_ksp_type = KSPGMRES; 3122 compute = PETSC_TRUE; 3123 } 3124 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3125 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3126 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3127 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3128 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3129 /* create random vec */ 3130 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3131 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3132 if (CoarseNullSpace) { 3133 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3134 } 3135 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3136 /* solve coarse problem */ 3137 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3138 if (CoarseNullSpace) { 3139 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3140 } 3141 /* check coarse problem residual error */ 3142 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3143 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3144 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3145 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3146 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3147 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3148 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3149 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3150 /* get eigenvalue estimation if preonly has not been requested */ 3151 if (!ispreonly) { 3152 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3153 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3154 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3155 } 3156 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3157 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3158 } 3159 /* free memory */ 3160 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3161 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3162 PetscFunctionReturn(0); 3163 } 3164 3165 #undef __FUNCT__ 3166 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3167 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3168 { 3169 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3170 PC_IS* pcis = (PC_IS*)pc->data; 3171 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3172 PetscInt i,j,coarse_size; 3173 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3174 PetscErrorCode ierr; 3175 3176 PetscFunctionBegin; 3177 /* get indices in local ordering for vertices and constraints */ 3178 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3179 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3180 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3181 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3182 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3183 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3184 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3185 3186 /* Compute global number of coarse dofs */ 3187 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3188 3189 /* check numbering */ 3190 if (pcbddc->dbg_flag) { 3191 PetscScalar coarsesum,*array; 3192 3193 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3194 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3195 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3196 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3197 for (i=0;i<pcbddc->local_primal_size;i++) { 3198 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3199 } 3200 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3201 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3202 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3203 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3204 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3205 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3206 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3207 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3208 for (i=0;i<pcis->n;i++) { 3209 if (array[i] == 1.0) { 3210 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3211 } 3212 } 3213 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3214 for (i=0;i<pcis->n;i++) { 3215 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3216 } 3217 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3218 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3219 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3220 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3221 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3222 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3223 if (pcbddc->dbg_flag > 1) { 3224 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3225 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3226 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3227 for (i=0;i<pcbddc->local_primal_size;i++) { 3228 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]); 3229 } 3230 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3231 } 3232 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3233 } 3234 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3235 /* get back data */ 3236 *coarse_size_n = coarse_size; 3237 *local_primal_indices_n = local_primal_indices; 3238 PetscFunctionReturn(0); 3239 } 3240 3241