1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCResetCustomization" 47 PetscErrorCode PCBDDCResetCustomization(PC pc) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 PetscInt i; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 56 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 59 for (i=0;i<pcbddc->n_ISForDofs;i++) { 60 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 61 } 62 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 63 pcbddc->n_ISForDofs = 0; 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "PCBDDCResetTopography" 69 PetscErrorCode PCBDDCResetTopography(PC pc) 70 { 71 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 72 PetscErrorCode ierr; 73 74 PetscFunctionBegin; 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 102 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "PCBDDCCreateWorkVectors" 111 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 112 { 113 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 114 PC_IS *pcis = (PC_IS*)pc->data; 115 VecType impVecType; 116 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 117 PetscErrorCode ierr; 118 119 PetscFunctionBegin; 120 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 121 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 122 local_primal_size = n_constraints+n_vertices; 123 n_R = pcis->n-n_vertices; 124 /* local work vectors */ 125 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 126 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 127 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 128 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 129 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 130 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 131 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 132 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 133 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 134 if (n_constraints) { 135 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 136 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 137 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "PCBDDCSetUpCorrection" 144 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 145 { 146 PetscErrorCode ierr; 147 /* pointers to pcis and pcbddc */ 148 PC_IS* pcis = (PC_IS*)pc->data; 149 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 150 /* submatrices of local problem */ 151 Mat A_RV,A_VR,A_VV; 152 /* working matrices */ 153 Mat M1,M2,M3,C_CR; 154 /* working vectors */ 155 Vec vec1_C,vec2_C,vec1_V,vec2_V; 156 /* additional working stuff */ 157 IS is_aux; 158 ISLocalToGlobalMapping BtoNmap; 159 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 160 const PetscScalar *array,*row_cmat_values; 161 const PetscInt *row_cmat_indices,*idx_R_local; 162 PetscInt *vertices,*idx_V_B,*auxindices; 163 PetscInt n_vertices,n_constraints,size_of_constraint; 164 PetscInt i,j,n_R,n_D,n_B; 165 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check; 166 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 167 MatType impMatType; 168 /* some shortcuts to scalars */ 169 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 170 /* for debugging purposes */ 171 PetscReal *coarsefunctions_errors,*constraints_errors; 172 173 PetscFunctionBegin; 174 /* get number of vertices and their local indices */ 175 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 176 n_constraints = pcbddc->local_primal_size-n_vertices; 177 /* Set Non-overlapping dimensions */ 178 n_B = pcis->n_B; n_D = pcis->n - n_B; 179 n_R = pcis->n-n_vertices; 180 181 /* Set types for local objects needed by BDDC precondtioner */ 182 impMatType = MATSEQDENSE; 183 184 /* Allocating some extra storage just to be safe */ 185 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 186 for (i=0;i<pcis->n;i++) auxindices[i]=i; 187 188 /* vertices in boundary numbering */ 189 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 190 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 191 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 192 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 193 if (i != n_vertices) { 194 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 195 } 196 197 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 198 if (n_constraints) { 199 /* work vectors */ 200 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 201 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 202 /* auxiliary matrices */ 203 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 204 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 205 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 206 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 207 208 /* Extract constraints on R nodes: C_{CR} */ 209 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 210 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 211 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 212 213 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 214 for (i=0;i<n_constraints;i++) { 215 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 216 /* Get row of constraint matrix in R numbering */ 217 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 218 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 219 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 220 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 221 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 222 /* Solve for row of constraint matrix in R numbering */ 223 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 224 /* Set values in local_auxmat2 */ 225 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 226 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 227 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 228 } 229 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 230 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 231 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 232 233 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 234 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 235 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 236 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 237 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 238 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 239 ierr = MatSetUp(M1);CHKERRQ(ierr); 240 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 241 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 242 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 243 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 244 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 245 ierr = MatDestroy(&M2);CHKERRQ(ierr); 246 ierr = MatDestroy(&M3);CHKERRQ(ierr); 247 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 248 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 249 } 250 251 /* Get submatrices from subdomain matrix */ 252 if (n_vertices) { 253 PetscInt ibs,mbs; 254 PetscBool issbaij; 255 Mat newmat; 256 257 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 258 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 259 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 260 if (ibs != mbs) { /* need to convert to SEQAIJ */ 261 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 262 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 263 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 264 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 265 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 266 } else { 267 /* this is safe */ 268 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 269 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 270 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 271 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 272 /* which of the two approaches is faster? */ 273 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 274 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 275 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 276 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 277 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 278 } else { 279 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 280 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 281 } 282 } 283 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 284 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 285 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 286 } 287 288 /* Matrix of coarse basis functions (local) */ 289 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 290 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 291 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 292 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 293 if (pcbddc->switch_static || pcbddc->dbg_flag) { 294 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 295 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 296 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 297 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 298 } 299 300 if (pcbddc->dbg_flag) { 301 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 302 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 303 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 304 } 305 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 306 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 307 308 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 309 310 /* vertices */ 311 for (i=0;i<n_vertices;i++) { 312 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 313 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 314 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 315 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 316 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 317 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 318 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 319 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 320 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 321 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 322 if (n_constraints) { 323 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 324 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 325 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 326 } 327 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 328 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 329 330 /* Set values in coarse basis function and subdomain part of coarse_mat */ 331 /* coarse basis functions */ 332 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 333 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 334 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 335 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 336 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 337 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 338 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 339 if (pcbddc->switch_static || pcbddc->dbg_flag) { 340 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 341 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 342 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 343 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 344 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 345 } 346 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 347 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 348 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 349 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 350 if (n_constraints) { 351 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 352 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 353 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 354 } 355 356 /* check */ 357 if (pcbddc->dbg_flag) { 358 /* assemble subdomain vector on local nodes */ 359 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 360 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 361 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 362 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 363 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 364 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 365 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 366 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 367 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 368 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 369 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 370 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 371 if (n_constraints) { 372 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 373 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 374 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 375 } 376 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 377 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 378 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 379 /* check saddle point solution */ 380 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 381 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 382 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 383 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 384 /* shift by the identity matrix */ 385 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 386 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 387 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 388 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 389 } 390 } 391 392 /* constraints */ 393 for (i=0;i<n_constraints;i++) { 394 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 395 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 396 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 397 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 398 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 399 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 400 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 401 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 402 if (n_vertices) { 403 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 404 } 405 /* Set values in coarse basis function and subdomain part of coarse_mat */ 406 /* coarse basis functions */ 407 j = i+n_vertices; /* don't touch this! */ 408 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 409 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 410 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 411 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 412 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 413 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 414 if (pcbddc->switch_static || pcbddc->dbg_flag) { 415 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 416 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 417 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 418 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 419 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 420 } 421 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 422 if (n_vertices) { 423 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 424 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 425 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 426 } 427 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 428 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 429 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 430 431 if (pcbddc->dbg_flag) { 432 /* assemble subdomain vector on nodes */ 433 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 434 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 435 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 436 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 437 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 438 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 439 /* assemble subdomain vector of lagrange multipliers */ 440 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 441 if (n_vertices) { 442 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 443 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 444 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 445 } 446 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 447 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 448 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 449 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 450 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 451 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 452 /* check saddle point solution */ 453 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 454 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 456 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 457 /* shift by the identity matrix */ 458 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 459 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 460 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 461 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 462 } 463 } 464 /* call assembling routines for local coarse basis */ 465 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 467 if (pcbddc->switch_static || pcbddc->dbg_flag) { 468 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 469 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 470 } 471 472 /* compute other basis functions for non-symmetric problems */ 473 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 474 if (!setsym || (setsym && !issym)) { 475 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 476 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 477 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 478 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 479 if (pcbddc->switch_static || pcbddc->dbg_flag) { 480 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 481 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 482 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 483 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 484 } 485 for (i=0;i<pcbddc->local_primal_size;i++) { 486 if (n_constraints) { 487 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 488 for (j=0;j<n_constraints;j++) { 489 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 490 } 491 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 492 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 493 } 494 if (i<n_vertices) { 495 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 496 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 497 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 498 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 499 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 500 if (n_constraints) { 501 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 502 } 503 } else { 504 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 505 } 506 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 507 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 508 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 509 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 510 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 511 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 512 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 513 if (i<n_vertices) { 514 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 515 } 516 if (pcbddc->switch_static || pcbddc->dbg_flag) { 517 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 518 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 519 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 520 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 521 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 522 } 523 524 if (pcbddc->dbg_flag) { 525 /* assemble subdomain vector on nodes */ 526 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 527 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 528 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 529 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 530 if (i<n_vertices) { 531 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 532 } 533 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 534 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 535 /* assemble subdomain vector of lagrange multipliers */ 536 for (j=0;j<pcbddc->local_primal_size;j++) { 537 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 538 } 539 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 540 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 541 /* check saddle point solution */ 542 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 543 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 544 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 545 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 546 /* shift by the identity matrix */ 547 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 548 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 549 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 550 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 551 } 552 } 553 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 555 if (pcbddc->switch_static || pcbddc->dbg_flag) { 556 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 557 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 558 } 559 unsymmetric_check = PETSC_TRUE; 560 } else { /* take references to already computed coarse basis */ 561 unsymmetric_check = PETSC_FALSE; 562 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 563 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 564 if (pcbddc->coarse_phi_D) { 565 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 566 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 567 } 568 } 569 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 570 /* Checking coarse_sub_mat and coarse basis functios */ 571 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 572 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 573 if (pcbddc->dbg_flag) { 574 Mat coarse_sub_mat; 575 Mat AUXMAT,TM1,TM2,TM3,TM4; 576 Mat coarse_phi_D,coarse_phi_B; 577 Mat coarse_psi_D,coarse_psi_B; 578 Mat A_II,A_BB,A_IB,A_BI; 579 MatType checkmattype=MATSEQAIJ; 580 PetscReal real_value; 581 582 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 583 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 584 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 585 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 586 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 587 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 588 if (unsymmetric_check) { 589 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 590 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 591 } 592 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 593 594 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 595 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 596 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 597 if (unsymmetric_check) { 598 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 599 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 600 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 601 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 602 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 603 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 604 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 605 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 606 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 607 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 608 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 609 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 610 } else { 611 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 612 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 613 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 614 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 615 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 616 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 617 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 618 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 619 } 620 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 621 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 622 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 623 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 624 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 625 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 628 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 629 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 630 for (i=0;i<pcbddc->local_primal_size;i++) { 631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 632 } 633 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 634 for (i=0;i<pcbddc->local_primal_size;i++) { 635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 636 } 637 if (unsymmetric_check) { 638 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 639 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 640 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 641 } 642 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 643 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 644 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 645 } 646 } 647 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 648 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 649 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 650 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 651 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 652 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 653 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 654 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 655 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 656 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 657 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 658 if (unsymmetric_check) { 659 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 660 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 661 } 662 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 663 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 664 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 665 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 666 } 667 /* free memory */ 668 if (n_vertices) { 669 ierr = PetscFree(vertices);CHKERRQ(ierr); 670 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 671 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 672 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 673 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 674 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 675 } 676 if (n_constraints) { 677 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 678 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 679 ierr = MatDestroy(&M1);CHKERRQ(ierr); 680 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 681 } 682 ierr = PetscFree(auxindices);CHKERRQ(ierr); 683 /* get back data */ 684 *coarse_submat_vals_n = coarse_submat_vals; 685 PetscFunctionReturn(0); 686 } 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 690 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 691 { 692 PC_IS* pcis = (PC_IS*)(pc->data); 693 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 694 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 695 PetscBool issbaij,isbaij; 696 /* manage repeated solves */ 697 MatReuse reuse; 698 MatStructure matstruct; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 /* get mat flags */ 703 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 704 reuse = MAT_INITIAL_MATRIX; 705 if (pc->setupcalled) { 706 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 707 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 708 if (matstruct == SAME_NONZERO_PATTERN) { 709 reuse = MAT_REUSE_MATRIX; 710 } else { 711 reuse = MAT_INITIAL_MATRIX; 712 } 713 } 714 if (reuse == MAT_INITIAL_MATRIX) { 715 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 717 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 718 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 719 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 720 } 721 722 /* transform local matrices if needed */ 723 if (pcbddc->use_change_of_basis) { 724 Mat change_mat_all; 725 PetscScalar *row_cmat_values; 726 PetscInt *row_cmat_indices; 727 PetscInt *nnz,*is_indices,*temp_indices; 728 PetscInt i,j,k,n_D,n_B; 729 730 /* Get Non-overlapping dimensions */ 731 n_B = pcis->n_B; 732 n_D = pcis->n-n_B; 733 734 /* compute nonzero structure of change of basis on all local nodes */ 735 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 736 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 737 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 738 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 739 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 740 k=1; 741 for (i=0;i<n_B;i++) { 742 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 743 nnz[is_indices[i]]=j; 744 if (k < j) k = j; 745 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 746 } 747 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 748 /* assemble change of basis matrix on the whole set of local dofs */ 749 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 750 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 751 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 752 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 753 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 754 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 755 for (i=0;i<n_D;i++) { 756 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 757 } 758 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 759 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 760 for (i=0;i<n_B;i++) { 761 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 762 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 763 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 764 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 765 } 766 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 768 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 769 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 770 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 771 if (!issbaij && !isbaij) { 772 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 773 } else { 774 Mat work_mat; 775 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 776 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 777 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 778 } 779 /* 780 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 781 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 782 */ 783 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 784 ierr = PetscFree(nnz);CHKERRQ(ierr); 785 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 786 } else { 787 /* without change of basis, the local matrix is unchanged */ 788 if (!pcbddc->local_mat) { 789 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 790 pcbddc->local_mat = matis->A; 791 } 792 } 793 794 /* get submatrices */ 795 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 796 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 797 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 798 if (!issbaij) { 799 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 800 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 801 } else { 802 Mat newmat; 803 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 804 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 805 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 806 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 807 } 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 813 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 814 { 815 PC_IS* pcis = (PC_IS*)(pc->data); 816 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 817 IS is_aux1,is_aux2; 818 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 819 PetscInt n_vertices,i,j,n_R,n_D,n_B; 820 PetscInt vbs,bs; 821 PetscBT bitmask; 822 PetscErrorCode ierr; 823 824 PetscFunctionBegin; 825 /* Set Non-overlapping dimensions */ 826 n_B = pcis->n_B; n_D = pcis->n - n_B; 827 /* get vertex indices from constraint matrix */ 828 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 829 830 /* create auxiliary bitmask */ 831 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 832 for (i=0;i<n_vertices;i++) { 833 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 834 } 835 836 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 837 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 838 for (i=0, n_R=0; i<pcis->n; i++) { 839 if (!PetscBTLookup(bitmask,i)) { 840 idx_R_local[n_R] = i; 841 n_R++; 842 } 843 } 844 845 /* Block code */ 846 vbs = 1; 847 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 848 if (bs>1 && !(n_vertices%bs)) { 849 PetscBool is_blocked = PETSC_TRUE; 850 PetscInt *vary; 851 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 852 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 853 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 854 for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++; 855 for (i=0; i<n_vertices; i++) { 856 if (vary[i]!=0 && vary[i]!=bs) { 857 is_blocked = PETSC_FALSE; 858 break; 859 } 860 } 861 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 862 vbs = bs; 863 for (i=0;i<n_R/vbs;i++) { 864 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 865 } 866 } 867 ierr = PetscFree(vary);CHKERRQ(ierr); 868 } 869 ierr = PetscFree(vertices);CHKERRQ(ierr); 870 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 871 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 872 873 /* print some info if requested */ 874 if (pcbddc->dbg_flag) { 875 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 876 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 877 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 878 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 879 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 880 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 881 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 882 } 883 884 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 885 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 886 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 887 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 888 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 889 for (i=0; i<n_D; i++) { 890 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 891 } 892 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 893 for (i=0, j=0; i<n_R; i++) { 894 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 895 aux_array1[j++] = i; 896 } 897 } 898 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 899 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 900 for (i=0, j=0; i<n_B; i++) { 901 if (!PetscBTLookup(bitmask,is_indices[i])) { 902 aux_array2[j++] = i; 903 } 904 } 905 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 906 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 907 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 908 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 909 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 910 911 if (pcbddc->switch_static || pcbddc->dbg_flag) { 912 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 913 for (i=0, j=0; i<n_R; i++) { 914 if (PetscBTLookup(bitmask,idx_R_local[i])) { 915 aux_array1[j++] = i; 916 } 917 } 918 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 919 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 920 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 921 } 922 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 923 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 924 PetscFunctionReturn(0); 925 } 926 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 930 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 931 { 932 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 933 PC_IS *pcis = (PC_IS*)pc->data; 934 PC pc_temp; 935 Mat A_RR; 936 Vec vec1,vec2,vec3; 937 MatStructure matstruct; 938 PetscScalar m_one = -1.0; 939 PetscReal value; 940 PetscInt n_D,n_R,ibs,mbs; 941 PetscBool use_exact,use_exact_reduced,issbaij; 942 PetscErrorCode ierr; 943 /* prefixes stuff */ 944 char dir_prefix[256],neu_prefix[256],str_level[3]; 945 size_t len; 946 947 PetscFunctionBegin; 948 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 949 950 /* compute prefixes */ 951 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 952 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 953 if (!pcbddc->current_level) { 954 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 955 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 956 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 957 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 958 } else { 959 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 960 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 961 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 962 len -= 15; /* remove "pc_bddc_coarse_" */ 963 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 964 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 965 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 966 *(dir_prefix+len)='\0'; 967 *(neu_prefix+len)='\0'; 968 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 969 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 970 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 971 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 972 } 973 974 /* DIRICHLET PROBLEM */ 975 /* Matrix for Dirichlet problem is pcis->A_II */ 976 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 977 if (!pcbddc->ksp_D) { /* create object if not yet build */ 978 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 979 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 980 /* default */ 981 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 982 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 983 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 984 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 985 if (issbaij) { 986 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 987 } else { 988 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 989 } 990 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 991 } 992 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 993 /* Allow user's customization */ 994 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 995 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 996 if (!n_D) { 997 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 998 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 999 } 1000 /* Set Up KSP for Dirichlet problem of BDDC */ 1001 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1002 /* set ksp_D into pcis data */ 1003 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1004 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1005 pcis->ksp_D = pcbddc->ksp_D; 1006 1007 /* NEUMANN PROBLEM */ 1008 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1009 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1010 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1011 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1012 if (ibs != mbs) { 1013 Mat newmat; 1014 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1015 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1016 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1017 } else { 1018 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1019 } 1020 if (!pcbddc->ksp_R) { /* create object if not yet build */ 1021 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1022 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1023 /* default */ 1024 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1025 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1026 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1027 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1028 if (issbaij) { 1029 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1030 } else { 1031 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1032 } 1033 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1034 } 1035 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1036 /* Allow user's customization */ 1037 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1038 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1039 if (!n_R) { 1040 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1041 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1042 } 1043 /* Set Up KSP for Neumann problem of BDDC */ 1044 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1045 1046 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1047 1048 /* Dirichlet */ 1049 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 1050 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1051 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1052 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 1053 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 1054 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1055 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1056 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1057 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1058 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1059 /* need to be adapted? */ 1060 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1061 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1062 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1063 /* print info */ 1064 if (pcbddc->dbg_flag) { 1065 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1066 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1067 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1068 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1069 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1070 } 1071 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1072 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1073 } 1074 1075 /* Neumann */ 1076 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 1077 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1078 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1079 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 1080 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 1081 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1082 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1083 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1084 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1085 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1086 /* need to be adapted? */ 1087 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1088 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1089 /* print info */ 1090 if (pcbddc->dbg_flag) { 1091 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1092 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1093 } 1094 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1095 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1096 } 1097 /* free Neumann problem's matrix */ 1098 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1104 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1105 { 1106 PetscErrorCode ierr; 1107 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1108 1109 PetscFunctionBegin; 1110 if (applytranspose) { 1111 if (pcbddc->local_auxmat1) { 1112 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1113 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1114 } 1115 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1116 } else { 1117 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1118 if (pcbddc->local_auxmat1) { 1119 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1120 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1121 } 1122 } 1123 PetscFunctionReturn(0); 1124 } 1125 1126 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1129 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1130 { 1131 PetscErrorCode ierr; 1132 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1133 PC_IS* pcis = (PC_IS*) (pc->data); 1134 const PetscScalar zero = 0.0; 1135 1136 PetscFunctionBegin; 1137 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1138 if (applytranspose) { 1139 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1140 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1141 } else { 1142 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1143 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1144 } 1145 /* Scatter data of coarse_rhs */ 1146 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1147 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1148 1149 /* Local solution on R nodes */ 1150 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1151 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1152 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1153 if (pcbddc->switch_static) { 1154 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1155 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1156 } 1157 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1158 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1159 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1160 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1161 if (pcbddc->switch_static) { 1162 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1163 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1164 } 1165 1166 /* Coarse solution */ 1167 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1168 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1169 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1170 } 1171 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1173 1174 /* Sum contributions from two levels */ 1175 if (applytranspose) { 1176 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1177 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1178 } else { 1179 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1180 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1187 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1188 { 1189 PetscErrorCode ierr; 1190 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1191 1192 PetscFunctionBegin; 1193 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1199 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1200 { 1201 PetscErrorCode ierr; 1202 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1203 1204 PetscFunctionBegin; 1205 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1206 PetscFunctionReturn(0); 1207 } 1208 1209 /* uncomment for testing purposes */ 1210 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1213 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1214 { 1215 PetscErrorCode ierr; 1216 PC_IS* pcis = (PC_IS*)(pc->data); 1217 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1218 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1219 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1220 MatType impMatType=MATSEQAIJ; 1221 /* one and zero */ 1222 PetscScalar one=1.0,zero=0.0; 1223 /* space to store constraints and their local indices */ 1224 PetscScalar *temp_quadrature_constraint; 1225 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1226 /* iterators */ 1227 PetscInt i,j,k,total_counts,temp_start_ptr; 1228 /* stuff to store connected components stored in pcbddc->mat_graph */ 1229 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1230 PetscInt n_ISForFaces,n_ISForEdges; 1231 /* near null space stuff */ 1232 MatNullSpace nearnullsp; 1233 const Vec *nearnullvecs; 1234 Vec *localnearnullsp; 1235 PetscBool nnsp_has_cnst; 1236 PetscInt nnsp_size; 1237 PetscScalar *array; 1238 /* BLAS integers */ 1239 PetscBLASInt lwork,lierr; 1240 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1241 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1242 /* LAPACK working arrays for SVD or POD */ 1243 PetscBool skip_lapack; 1244 PetscScalar *work; 1245 PetscReal *singular_vals; 1246 #if defined(PETSC_USE_COMPLEX) 1247 PetscReal *rwork; 1248 #endif 1249 #if defined(PETSC_MISSING_LAPACK_GESVD) 1250 PetscBLASInt Blas_one_2=1; 1251 PetscScalar *temp_basis,*correlation_mat; 1252 #else 1253 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1254 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1255 #endif 1256 /* change of basis */ 1257 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1258 PetscBool boolforchange,qr_needed; 1259 PetscBT touched,change_basis,qr_needed_idx; 1260 /* auxiliary stuff */ 1261 PetscInt *nnz,*is_indices,*local_to_B; 1262 /* some quantities */ 1263 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1264 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1265 1266 1267 PetscFunctionBegin; 1268 /* Get index sets for faces, edges and vertices from graph */ 1269 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1270 pcbddc->use_vertices = PETSC_TRUE; 1271 } 1272 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1273 /* HACK: provide functions to set change of basis */ 1274 if (!ISForVertices && pcbddc->NullSpace) { 1275 pcbddc->use_change_of_basis = PETSC_TRUE; 1276 pcbddc->use_change_on_faces = PETSC_FALSE; 1277 } 1278 /* print some info */ 1279 if (pcbddc->dbg_flag) { 1280 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1281 i = 0; 1282 if (ISForVertices) { 1283 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1284 } 1285 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1287 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1288 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1289 } 1290 /* check if near null space is attached to global mat */ 1291 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1292 if (nearnullsp) { 1293 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1294 } else { /* if near null space is not provided BDDC uses constants by default */ 1295 nnsp_size = 0; 1296 nnsp_has_cnst = PETSC_TRUE; 1297 } 1298 /* get max number of constraints on a single cc */ 1299 max_constraints = nnsp_size; 1300 if (nnsp_has_cnst) max_constraints++; 1301 1302 /* 1303 Evaluate maximum storage size needed by the procedure 1304 - temp_indices will contain start index of each constraint stored as follows 1305 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1306 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1307 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1308 */ 1309 total_counts = n_ISForFaces+n_ISForEdges; 1310 total_counts *= max_constraints; 1311 n_vertices = 0; 1312 if (ISForVertices) { 1313 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1314 } 1315 total_counts += n_vertices; 1316 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1317 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1318 total_counts = 0; 1319 max_size_of_constraint = 0; 1320 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1321 if (i<n_ISForEdges) { 1322 used_IS = &ISForEdges[i]; 1323 } else { 1324 used_IS = &ISForFaces[i-n_ISForEdges]; 1325 } 1326 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1327 total_counts += j; 1328 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1329 } 1330 total_counts *= max_constraints; 1331 total_counts += n_vertices; 1332 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1333 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1334 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1335 /* local to boundary numbering */ 1336 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1337 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1338 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1339 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1340 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1341 /* get local part of global near null space vectors */ 1342 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1343 for (k=0;k<nnsp_size;k++) { 1344 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1345 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1346 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1347 } 1348 1349 /* whether or not to skip lapack calls */ 1350 skip_lapack = PETSC_TRUE; 1351 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1352 1353 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1354 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1355 PetscScalar temp_work; 1356 #if defined(PETSC_MISSING_LAPACK_GESVD) 1357 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1358 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1359 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1360 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1361 #if defined(PETSC_USE_COMPLEX) 1362 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1363 #endif 1364 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1365 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1366 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1367 lwork = -1; 1368 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1369 #if !defined(PETSC_USE_COMPLEX) 1370 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1371 #else 1372 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1373 #endif 1374 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1375 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1376 #else /* on missing GESVD */ 1377 /* SVD */ 1378 PetscInt max_n,min_n; 1379 max_n = max_size_of_constraint; 1380 min_n = max_constraints; 1381 if (max_size_of_constraint < max_constraints) { 1382 min_n = max_size_of_constraint; 1383 max_n = max_constraints; 1384 } 1385 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1386 #if defined(PETSC_USE_COMPLEX) 1387 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1388 #endif 1389 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1390 lwork = -1; 1391 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1392 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1393 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1394 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1395 #if !defined(PETSC_USE_COMPLEX) 1396 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1397 #else 1398 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1399 #endif 1400 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1401 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1402 #endif /* on missing GESVD */ 1403 /* Allocate optimal workspace */ 1404 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1405 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1406 } 1407 /* Now we can loop on constraining sets */ 1408 total_counts = 0; 1409 temp_indices[0] = 0; 1410 /* vertices */ 1411 if (ISForVertices) { 1412 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1413 if (nnsp_has_cnst) { /* consider all vertices */ 1414 for (i=0;i<n_vertices;i++) { 1415 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1416 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1417 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1418 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1419 total_counts++; 1420 } 1421 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1422 PetscBool used_vertex; 1423 for (i=0;i<n_vertices;i++) { 1424 used_vertex = PETSC_FALSE; 1425 k = 0; 1426 while (!used_vertex && k<nnsp_size) { 1427 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1428 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1429 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1430 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1431 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1432 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1433 total_counts++; 1434 used_vertex = PETSC_TRUE; 1435 } 1436 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1437 k++; 1438 } 1439 } 1440 } 1441 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1442 n_vertices = total_counts; 1443 } 1444 1445 /* edges and faces */ 1446 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1447 if (i<n_ISForEdges) { 1448 used_IS = &ISForEdges[i]; 1449 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1450 } else { 1451 used_IS = &ISForFaces[i-n_ISForEdges]; 1452 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1453 } 1454 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1455 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1456 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1457 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1458 /* change of basis should not be performed on local periodic nodes */ 1459 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1460 if (nnsp_has_cnst) { 1461 PetscScalar quad_value; 1462 temp_constraints++; 1463 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1464 for (j=0;j<size_of_constraint;j++) { 1465 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1466 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1467 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1468 } 1469 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1470 total_counts++; 1471 } 1472 for (k=0;k<nnsp_size;k++) { 1473 PetscReal real_value; 1474 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1475 for (j=0;j<size_of_constraint;j++) { 1476 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1477 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1478 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1479 } 1480 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1481 /* check if array is null on the connected component */ 1482 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1483 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1484 if (real_value > 0.0) { /* keep indices and values */ 1485 temp_constraints++; 1486 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1487 total_counts++; 1488 } 1489 } 1490 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1491 valid_constraints = temp_constraints; 1492 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1493 if (!pcbddc->use_nnsp_true && temp_constraints) { 1494 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1495 1496 #if defined(PETSC_MISSING_LAPACK_GESVD) 1497 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1498 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1499 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1500 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1501 from that computed using LAPACKgesvd 1502 -> This is due to a different computation of eigenvectors in LAPACKheev 1503 -> The quality of the POD-computed basis will be the same */ 1504 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1505 /* Store upper triangular part of correlation matrix */ 1506 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1507 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1508 for (j=0;j<temp_constraints;j++) { 1509 for (k=0;k<j+1;k++) { 1510 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1511 } 1512 } 1513 /* compute eigenvalues and eigenvectors of correlation matrix */ 1514 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1515 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1516 #if !defined(PETSC_USE_COMPLEX) 1517 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1518 #else 1519 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1520 #endif 1521 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1522 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1523 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1524 j=0; 1525 while (j < temp_constraints && singular_vals[j] < tol) j++; 1526 total_counts=total_counts-j; 1527 valid_constraints = temp_constraints-j; 1528 /* scale and copy POD basis into used quadrature memory */ 1529 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1530 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1531 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1532 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1533 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1534 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1535 if (j<temp_constraints) { 1536 PetscInt ii; 1537 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1538 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1539 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1540 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1541 for (k=0;k<temp_constraints-j;k++) { 1542 for (ii=0;ii<size_of_constraint;ii++) { 1543 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1544 } 1545 } 1546 } 1547 #else /* on missing GESVD */ 1548 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1549 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1550 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1551 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1552 #if !defined(PETSC_USE_COMPLEX) 1553 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1554 #else 1555 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1556 #endif 1557 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1558 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1559 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1560 k = temp_constraints; 1561 if (k > size_of_constraint) k = size_of_constraint; 1562 j = 0; 1563 while (j < k && singular_vals[k-j-1] < tol) j++; 1564 total_counts = total_counts-temp_constraints+k-j; 1565 valid_constraints = k-j; 1566 #endif /* on missing GESVD */ 1567 } 1568 /* setting change_of_basis flag is safe now */ 1569 if (boolforchange) { 1570 for (j=0;j<valid_constraints;j++) { 1571 PetscBTSet(change_basis,total_counts-j-1); 1572 } 1573 } 1574 } 1575 /* free index sets of faces, edges and vertices */ 1576 for (i=0;i<n_ISForFaces;i++) { 1577 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1578 } 1579 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1580 for (i=0;i<n_ISForEdges;i++) { 1581 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1582 } 1583 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1584 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1585 1586 /* free workspace */ 1587 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1588 ierr = PetscFree(work);CHKERRQ(ierr); 1589 #if defined(PETSC_USE_COMPLEX) 1590 ierr = PetscFree(rwork);CHKERRQ(ierr); 1591 #endif 1592 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1593 #if defined(PETSC_MISSING_LAPACK_GESVD) 1594 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1595 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1596 #endif 1597 } 1598 for (k=0;k<nnsp_size;k++) { 1599 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1600 } 1601 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1602 1603 /* set quantities in pcbddc data structure */ 1604 /* n_vertices defines the number of subdomain corners in the primal space */ 1605 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1606 pcbddc->local_primal_size = total_counts; 1607 pcbddc->n_vertices = n_vertices; 1608 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1609 1610 /* Create constraint matrix */ 1611 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1612 /* so we need to set it up properly either with or without change of basis */ 1613 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1614 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1615 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1616 /* array to compute a local numbering of constraints : vertices first then constraints */ 1617 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1618 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1619 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1620 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1621 /* auxiliary stuff for basis change */ 1622 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1623 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1624 1625 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1626 total_primal_vertices=0; 1627 for (i=0;i<pcbddc->local_primal_size;i++) { 1628 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1629 if (size_of_constraint == 1) { 1630 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1631 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1632 aux_primal_minloc[total_primal_vertices]=0; 1633 total_primal_vertices++; 1634 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1635 PetscInt min_loc,min_index; 1636 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1637 /* find first untouched local node */ 1638 k = 0; 1639 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1640 min_index = global_indices[k]; 1641 min_loc = k; 1642 /* search the minimum among global nodes already untouched on the cc */ 1643 for (k=1;k<size_of_constraint;k++) { 1644 /* there can be more than one constraint on a single connected component */ 1645 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1646 min_index = global_indices[k]; 1647 min_loc = k; 1648 } 1649 } 1650 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1651 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1652 aux_primal_minloc[total_primal_vertices]=min_loc; 1653 total_primal_vertices++; 1654 } 1655 } 1656 /* determine if a QR strategy is needed for change of basis */ 1657 qr_needed = PETSC_FALSE; 1658 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1659 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1660 if (PetscBTLookup(change_basis,i)) { 1661 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1662 j = 0; 1663 for (k=0;k<size_of_constraint;k++) { 1664 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1665 j++; 1666 } 1667 } 1668 /* found more than one primal dof on the cc */ 1669 if (j > 1) { 1670 PetscBTSet(qr_needed_idx,i); 1671 qr_needed = PETSC_TRUE; 1672 } 1673 } 1674 } 1675 /* free workspace */ 1676 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1677 1678 /* permute indices in order to have a sorted set of vertices */ 1679 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1680 1681 /* nonzero structure of constraint matrix */ 1682 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1683 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1684 j=total_primal_vertices; 1685 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1686 if (!PetscBTLookup(change_basis,i)) { 1687 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1688 j++; 1689 } 1690 } 1691 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1692 ierr = PetscFree(nnz);CHKERRQ(ierr); 1693 /* set values in constraint matrix */ 1694 for (i=0;i<total_primal_vertices;i++) { 1695 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1696 } 1697 total_counts = total_primal_vertices; 1698 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1699 if (!PetscBTLookup(change_basis,i)) { 1700 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1701 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1702 total_counts++; 1703 } 1704 } 1705 /* assembling */ 1706 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1707 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1708 /* 1709 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1710 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1711 */ 1712 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1713 if (pcbddc->use_change_of_basis) { 1714 /* dual and primal dofs on a single cc */ 1715 PetscInt dual_dofs,primal_dofs; 1716 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1717 PetscInt primal_counter; 1718 /* working stuff for GEQRF */ 1719 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1720 PetscBLASInt lqr_work; 1721 /* working stuff for UNGQR */ 1722 PetscScalar *gqr_work,lgqr_work_t; 1723 PetscBLASInt lgqr_work; 1724 /* working stuff for TRTRS */ 1725 PetscScalar *trs_rhs; 1726 PetscBLASInt Blas_NRHS; 1727 /* pointers for values insertion into change of basis matrix */ 1728 PetscInt *start_rows,*start_cols; 1729 PetscScalar *start_vals; 1730 /* working stuff for values insertion */ 1731 PetscBT is_primal; 1732 1733 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1734 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1735 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1736 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1737 /* work arrays */ 1738 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1739 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1740 /* nonzeros per row */ 1741 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1742 if (PetscBTLookup(change_basis,i)) { 1743 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1744 if (PetscBTLookup(qr_needed_idx,i)) { 1745 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1746 } else { 1747 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1748 /* get local primal index on the cc */ 1749 j = 0; 1750 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1751 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1752 } 1753 } 1754 } 1755 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1756 ierr = PetscFree(nnz);CHKERRQ(ierr); 1757 /* Set initial identity in the matrix */ 1758 for (i=0;i<pcis->n_B;i++) { 1759 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1760 } 1761 1762 if (pcbddc->dbg_flag) { 1763 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1764 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1765 } 1766 1767 1768 /* Now we loop on the constraints which need a change of basis */ 1769 /* 1770 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1771 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1772 1773 Basic blocks of change of basis matrix T computed 1774 1775 - Using the following block transformation if there is only a primal dof on the cc 1776 (in the example, primal dof is the last one of the edge in LOCAL ordering 1777 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1778 | 1 0 ... 0 1 | 1779 | 0 1 ... 0 1 | 1780 | ... | 1781 | 0 ... 1 1 | 1782 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1783 1784 - via QR decomposition of constraints otherwise 1785 */ 1786 if (qr_needed) { 1787 /* space to store Q */ 1788 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1789 /* first we issue queries for optimal work */ 1790 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1791 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1792 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1793 lqr_work = -1; 1794 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1795 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1796 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1797 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1798 lgqr_work = -1; 1799 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1800 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1801 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1802 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1803 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1804 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1805 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1806 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1807 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1808 /* array to store scaling factors for reflectors */ 1809 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1810 /* array to store rhs and solution of triangular solver */ 1811 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1812 /* allocating workspace for check */ 1813 if (pcbddc->dbg_flag) { 1814 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1815 } 1816 } 1817 /* array to store whether a node is primal or not */ 1818 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1819 for (i=0;i<total_primal_vertices;i++) { 1820 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1821 } 1822 1823 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1824 /* -> using implicit ordering contained in temp_indices data */ 1825 total_counts = pcbddc->n_vertices; 1826 primal_counter = total_counts; 1827 while (total_counts<pcbddc->local_primal_size) { 1828 primal_dofs = 1; 1829 if (PetscBTLookup(change_basis,total_counts)) { 1830 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1831 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1832 primal_dofs++; 1833 } 1834 /* get constraint info */ 1835 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1836 dual_dofs = size_of_constraint-primal_dofs; 1837 1838 if (pcbddc->dbg_flag) { 1839 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1840 } 1841 1842 if (primal_dofs > 1) { /* QR */ 1843 1844 /* copy quadrature constraints for change of basis check */ 1845 if (pcbddc->dbg_flag) { 1846 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1847 } 1848 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1849 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1850 1851 /* compute QR decomposition of constraints */ 1852 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1853 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1854 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1855 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1856 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1857 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1858 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1859 1860 /* explictly compute R^-T */ 1861 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1862 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1863 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1864 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1865 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1866 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1867 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1868 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1869 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1870 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1871 1872 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1873 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1874 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1875 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1876 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1877 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1878 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1879 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1880 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1881 1882 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1883 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1884 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1885 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1886 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1887 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1888 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1889 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1890 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1891 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1892 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1893 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1894 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1895 1896 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1897 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1898 /* insert cols for primal dofs */ 1899 for (j=0;j<primal_dofs;j++) { 1900 start_vals = &qr_basis[j*size_of_constraint]; 1901 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1902 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1903 } 1904 /* insert cols for dual dofs */ 1905 for (j=0,k=0;j<dual_dofs;k++) { 1906 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1907 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1908 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1909 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1910 j++; 1911 } 1912 } 1913 1914 /* check change of basis */ 1915 if (pcbddc->dbg_flag) { 1916 PetscInt ii,jj; 1917 PetscBool valid_qr=PETSC_TRUE; 1918 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1919 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1920 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1921 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1922 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1923 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1924 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1925 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1926 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1927 for (jj=0;jj<size_of_constraint;jj++) { 1928 for (ii=0;ii<primal_dofs;ii++) { 1929 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1930 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1931 } 1932 } 1933 if (!valid_qr) { 1934 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1935 for (jj=0;jj<size_of_constraint;jj++) { 1936 for (ii=0;ii<primal_dofs;ii++) { 1937 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1938 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1939 } 1940 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1941 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1942 } 1943 } 1944 } 1945 } else { 1946 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1947 } 1948 } 1949 } else { /* simple transformation block */ 1950 PetscInt row,col; 1951 PetscScalar val; 1952 for (j=0;j<size_of_constraint;j++) { 1953 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 1954 if (!PetscBTLookup(is_primal,row)) { 1955 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1956 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 1957 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 1958 } else { 1959 for (k=0;k<size_of_constraint;k++) { 1960 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1961 if (row != col) { 1962 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1963 } else { 1964 val = 1.0; 1965 } 1966 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 1967 } 1968 } 1969 } 1970 } 1971 /* increment primal counter */ 1972 primal_counter += primal_dofs; 1973 } else { 1974 if (pcbddc->dbg_flag) { 1975 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1976 } 1977 } 1978 /* increment constraint counter total_counts */ 1979 total_counts += primal_dofs; 1980 } 1981 1982 /* free workspace */ 1983 if (qr_needed) { 1984 if (pcbddc->dbg_flag) { 1985 ierr = PetscFree(work);CHKERRQ(ierr); 1986 } 1987 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1988 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1989 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1990 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1991 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1992 } 1993 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1994 /* assembling */ 1995 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1996 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1997 /* 1998 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1999 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2000 */ 2001 } 2002 2003 /* flush dbg viewer */ 2004 if (pcbddc->dbg_flag) { 2005 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2006 } 2007 2008 /* free workspace */ 2009 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2010 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2011 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2012 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2013 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2014 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2015 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2016 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2017 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2018 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2024 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2025 { 2026 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2027 PC_IS *pcis = (PC_IS*)pc->data; 2028 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2029 PetscInt bs,ierr,i,vertex_size; 2030 PetscViewer viewer=pcbddc->dbg_viewer; 2031 2032 PetscFunctionBegin; 2033 /* Init local Graph struct */ 2034 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2035 2036 /* Check validity of the csr graph passed in by the user */ 2037 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2038 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2039 } 2040 2041 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2042 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2043 Mat mat_adj; 2044 const PetscInt *xadj,*adjncy; 2045 PetscBool flg_row=PETSC_TRUE; 2046 2047 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2048 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2049 if (!flg_row) { 2050 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2051 } 2052 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2053 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2054 if (!flg_row) { 2055 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2056 } 2057 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2058 } 2059 2060 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2061 vertex_size = 1; 2062 if (!pcbddc->user_provided_isfordofs) { 2063 if (!pcbddc->n_ISForDofs) { 2064 IS *custom_ISForDofs; 2065 2066 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2067 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2068 for (i=0;i<bs;i++) { 2069 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2070 } 2071 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2072 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2073 /* remove my references to IS objects */ 2074 for (i=0;i<bs;i++) { 2075 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2076 } 2077 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2078 } 2079 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2080 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2081 } 2082 2083 /* Setup of Graph */ 2084 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2085 2086 /* Graph's connected components analysis */ 2087 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2088 2089 /* print some info to stdout */ 2090 if (pcbddc->dbg_flag) { 2091 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2092 } 2093 PetscFunctionReturn(0); 2094 } 2095 2096 #undef __FUNCT__ 2097 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2098 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2099 { 2100 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2101 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2102 PetscErrorCode ierr; 2103 2104 PetscFunctionBegin; 2105 n = 0; 2106 vertices = 0; 2107 if (pcbddc->ConstraintMatrix) { 2108 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2109 for (i=0;i<local_primal_size;i++) { 2110 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2111 if (size_of_constraint == 1) n++; 2112 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2113 } 2114 if (vertices_idx) { 2115 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2116 n = 0; 2117 for (i=0;i<local_primal_size;i++) { 2118 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2119 if (size_of_constraint == 1) { 2120 vertices[n++]=row_cmat_indices[0]; 2121 } 2122 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2123 } 2124 } 2125 } 2126 *n_vertices = n; 2127 if (vertices_idx) *vertices_idx = vertices; 2128 PetscFunctionReturn(0); 2129 } 2130 2131 #undef __FUNCT__ 2132 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2133 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2134 { 2135 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2136 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2137 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2138 PetscBT touched; 2139 PetscErrorCode ierr; 2140 2141 /* This function assumes that the number of local constraints per connected component 2142 is not greater than the number of nodes defined for the connected component 2143 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2144 PetscFunctionBegin; 2145 n = 0; 2146 constraints_index = 0; 2147 if (pcbddc->ConstraintMatrix) { 2148 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2149 max_size_of_constraint = 0; 2150 for (i=0;i<local_primal_size;i++) { 2151 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2152 if (size_of_constraint > 1) { 2153 n++; 2154 } 2155 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2156 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2157 } 2158 if (constraints_idx) { 2159 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2160 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2161 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2162 n = 0; 2163 for (i=0;i<local_primal_size;i++) { 2164 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2165 if (size_of_constraint > 1) { 2166 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2167 /* find first untouched local node */ 2168 j = 0; 2169 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2170 min_index = row_cmat_global_indices[j]; 2171 min_loc = j; 2172 /* search the minimum among nodes not yet touched on the connected component 2173 since there can be more than one constraint on a single cc */ 2174 for (j=1;j<size_of_constraint;j++) { 2175 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2176 min_index = row_cmat_global_indices[j]; 2177 min_loc = j; 2178 } 2179 } 2180 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2181 constraints_index[n++] = row_cmat_indices[min_loc]; 2182 } 2183 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2184 } 2185 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2186 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2187 } 2188 } 2189 *n_constraints = n; 2190 if (constraints_idx) *constraints_idx = constraints_index; 2191 PetscFunctionReturn(0); 2192 } 2193 2194 /* the next two functions has been adapted from pcis.c */ 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "PCBDDCApplySchur" 2197 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2198 { 2199 PetscErrorCode ierr; 2200 PC_IS *pcis = (PC_IS*)(pc->data); 2201 2202 PetscFunctionBegin; 2203 if (!vec2_B) { vec2_B = v; } 2204 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2205 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2206 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2207 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2208 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2214 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2215 { 2216 PetscErrorCode ierr; 2217 PC_IS *pcis = (PC_IS*)(pc->data); 2218 2219 PetscFunctionBegin; 2220 if (!vec2_B) { vec2_B = v; } 2221 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2222 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2223 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2224 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2225 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2226 PetscFunctionReturn(0); 2227 } 2228 2229 #undef __FUNCT__ 2230 #define __FUNCT__ "PCBDDCSubsetNumbering" 2231 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2232 { 2233 Vec local_vec,global_vec; 2234 IS seqis,paris; 2235 VecScatter scatter_ctx; 2236 PetscScalar *array; 2237 PetscInt *temp_global_dofs; 2238 PetscScalar globalsum; 2239 PetscInt i,j,s; 2240 PetscInt nlocals,first_index,old_index,max_local; 2241 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2242 PetscMPIInt *dof_sizes,*dof_displs; 2243 PetscBool first_found; 2244 PetscErrorCode ierr; 2245 2246 PetscFunctionBegin; 2247 /* mpi buffers */ 2248 MPI_Comm_size(comm,&size_prec_comm); 2249 MPI_Comm_rank(comm,&rank_prec_comm); 2250 j = ( !rank_prec_comm ? size_prec_comm : 0); 2251 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2252 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2253 /* get maximum size of subset */ 2254 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2255 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2256 max_local = 0; 2257 if (n_local_dofs) { 2258 max_local = temp_global_dofs[0]; 2259 for (i=1;i<n_local_dofs;i++) { 2260 if (max_local < temp_global_dofs[i] ) { 2261 max_local = temp_global_dofs[i]; 2262 } 2263 } 2264 } 2265 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2266 max_global++; 2267 max_local = 0; 2268 if (n_local_dofs) { 2269 max_local = local_dofs[0]; 2270 for (i=1;i<n_local_dofs;i++) { 2271 if (max_local < local_dofs[i] ) { 2272 max_local = local_dofs[i]; 2273 } 2274 } 2275 } 2276 max_local++; 2277 /* allocate workspace */ 2278 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2279 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2280 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2281 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2282 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2283 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2284 /* create scatter */ 2285 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2286 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2287 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2288 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2289 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2290 /* init array */ 2291 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2292 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2293 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2294 if (local_dofs_mult) { 2295 for (i=0;i<n_local_dofs;i++) { 2296 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2297 } 2298 } else { 2299 for (i=0;i<n_local_dofs;i++) { 2300 array[local_dofs[i]]=1.0; 2301 } 2302 } 2303 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2304 /* scatter into global vec and get total number of global dofs */ 2305 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2306 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2307 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2308 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2309 /* Fill global_vec with cumulative function for global numbering */ 2310 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2311 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2312 nlocals = 0; 2313 first_index = -1; 2314 first_found = PETSC_FALSE; 2315 for (i=0;i<s;i++) { 2316 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2317 first_found = PETSC_TRUE; 2318 first_index = i; 2319 } 2320 nlocals += (PetscInt)PetscRealPart(array[i]); 2321 } 2322 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2323 if (!rank_prec_comm) { 2324 dof_displs[0]=0; 2325 for (i=1;i<size_prec_comm;i++) { 2326 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2327 } 2328 } 2329 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2330 if (first_found) { 2331 array[first_index] += (PetscScalar)nlocals; 2332 old_index = first_index; 2333 for (i=first_index+1;i<s;i++) { 2334 if (PetscRealPart(array[i]) > 0.0) { 2335 array[i] += array[old_index]; 2336 old_index = i; 2337 } 2338 } 2339 } 2340 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2341 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2342 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2343 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2344 /* get global ordering of local dofs */ 2345 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2346 if (local_dofs_mult) { 2347 for (i=0;i<n_local_dofs;i++) { 2348 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2349 } 2350 } else { 2351 for (i=0;i<n_local_dofs;i++) { 2352 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2353 } 2354 } 2355 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2356 /* free workspace */ 2357 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2358 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2359 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2360 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2361 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2362 /* return pointer to global ordering of local dofs */ 2363 *global_numbering_subset = temp_global_dofs; 2364 PetscFunctionReturn(0); 2365 } 2366 2367 #undef __FUNCT__ 2368 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2369 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2370 { 2371 PetscInt i,j; 2372 PetscScalar *alphas; 2373 PetscErrorCode ierr; 2374 2375 PetscFunctionBegin; 2376 /* this implements stabilized Gram-Schmidt */ 2377 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2378 for (i=0;i<n;i++) { 2379 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2380 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2381 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2382 } 2383 ierr = PetscFree(alphas);CHKERRQ(ierr); 2384 PetscFunctionReturn(0); 2385 } 2386 2387 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatConvert_IS_AIJ" 2390 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2391 { 2392 Mat new_mat; 2393 Mat_IS *matis = (Mat_IS*)(mat->data); 2394 /* info on mat */ 2395 PetscInt bs,rows,cols; 2396 PetscInt lrows,lcols; 2397 PetscInt local_rows,local_cols; 2398 PetscMPIInt nsubdomains; 2399 /* preallocation */ 2400 Vec vec_dnz,vec_onz; 2401 PetscScalar *my_dnz,*my_onz,*array; 2402 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2403 PetscInt index_row,index_col,owner; 2404 PetscInt *local_indices,*global_indices; 2405 /* work */ 2406 PetscInt i,j; 2407 PetscErrorCode ierr; 2408 2409 PetscFunctionBegin; 2410 /* CHECKS */ 2411 /* get info from mat */ 2412 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2413 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2414 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2415 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2416 2417 /* MAT_INITIAL_MATRIX */ 2418 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2419 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2420 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2421 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2422 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2423 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2424 2425 /* 2426 preallocation 2427 */ 2428 2429 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2430 /* 2431 Some vectors are needed to sum up properly on shared interface dofs. 2432 Note that preallocation is not exact, since it overestimates nonzeros 2433 */ 2434 /* 2435 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2436 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2437 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2438 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2439 */ 2440 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2441 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2442 /* All processes need to compute entire row ownership */ 2443 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2444 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2445 for (i=0;i<nsubdomains;i++) { 2446 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2447 row_ownership[j]=i; 2448 } 2449 } 2450 /* map indices of local mat to global values */ 2451 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2452 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2453 for (i=0;i<local_rows;i++) local_indices[i]=i; 2454 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2455 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2456 2457 /* 2458 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2459 then, they will be summed up properly. This way, preallocation is always sufficient 2460 */ 2461 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2462 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2463 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2464 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2465 for (i=0;i<local_rows;i++) { 2466 index_row = global_indices[i]; 2467 for (j=i;j<local_rows;j++) { 2468 owner = row_ownership[index_row]; 2469 index_col = global_indices[j]; 2470 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2471 my_dnz[i] += 1.0; 2472 } else { /* offdiag block */ 2473 my_onz[i] += 1.0; 2474 } 2475 /* same as before, interchanging rows and cols */ 2476 if (i != j) { 2477 owner = row_ownership[index_col]; 2478 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2479 my_dnz[j] += 1.0; 2480 } else { 2481 my_onz[j] += 1.0; 2482 } 2483 } 2484 } 2485 } 2486 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2487 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2488 if (local_rows) { /* multilevel guard */ 2489 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2490 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2491 } 2492 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2493 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2494 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2495 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2496 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2497 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2498 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2499 2500 /* set computed preallocation in dnz and onz */ 2501 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2502 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2503 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2504 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2505 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2506 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2507 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2508 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2509 2510 /* Resize preallocation if overestimated */ 2511 for (i=0;i<lrows;i++) { 2512 dnz[i] = PetscMin(dnz[i],lcols); 2513 onz[i] = PetscMin(onz[i],cols-lcols); 2514 } 2515 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2516 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2517 2518 /* 2519 Set values. Very Basic. 2520 */ 2521 for (i=0;i<local_rows;i++) { 2522 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2523 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2524 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2525 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2526 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2527 } 2528 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2529 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2530 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2531 2532 /* get back mat */ 2533 *M = new_mat; 2534 PetscFunctionReturn(0); 2535 } 2536 2537 #undef __FUNCT__ 2538 #define __FUNCT__ "MatISSubassemble_Private" 2539 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2540 { 2541 Mat subdomain_adj; 2542 IS new_ranks,ranks_send_to; 2543 MatPartitioning partitioner; 2544 Mat_IS *matis; 2545 PetscInt n_neighs,*neighs,*n_shared,**shared; 2546 PetscInt prank; 2547 PetscMPIInt size,rank,color; 2548 PetscInt *xadj,*adjncy,*oldranks; 2549 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2550 PetscInt i,j,n_subdomains,local_size,threshold=0; 2551 PetscErrorCode ierr; 2552 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2553 PetscSubcomm subcomm; 2554 2555 PetscFunctionBegin; 2556 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2557 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2558 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2559 2560 /* Get info on mapping */ 2561 matis = (Mat_IS*)(mat->data); 2562 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2563 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2564 2565 /* build local CSR graph of subdomains' connectivity */ 2566 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2567 xadj[0] = 0; 2568 xadj[1] = PetscMax(n_neighs-1,0); 2569 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2570 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2571 2572 if (threshold) { 2573 PetscInt* count,min_threshold; 2574 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2575 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2576 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2577 for (j=0;j<n_shared[i];j++) { 2578 count[shared[i][j]] += 1; 2579 } 2580 } 2581 /* adapt threshold since we dont want to lose significant connections */ 2582 min_threshold = n_neighs; 2583 for (i=1;i<n_neighs;i++) { 2584 for (j=0;j<n_shared[i];j++) { 2585 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2586 } 2587 } 2588 PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold); 2589 threshold = PetscMax(min_threshold+1,threshold); 2590 PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold); 2591 xadj[1] = 0; 2592 for (i=1;i<n_neighs;i++) { 2593 for (j=0;j<n_shared[i];j++) { 2594 if (count[shared[i][j]] < threshold) { 2595 adjncy[xadj[1]] = neighs[i]; 2596 adjncy_wgt[xadj[1]] = n_shared[i]; 2597 xadj[1]++; 2598 break; 2599 } 2600 } 2601 } 2602 ierr = PetscFree(count);CHKERRQ(ierr); 2603 } else { 2604 if (xadj[1]) { 2605 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2606 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2607 } 2608 } 2609 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2610 if (use_square) { 2611 for (i=0;i<xadj[1];i++) { 2612 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2613 } 2614 } 2615 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2616 2617 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2618 2619 /* 2620 Restrict work on active processes only. 2621 */ 2622 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2623 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2624 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2625 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2626 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2627 if (color) { 2628 ierr = PetscFree(xadj);CHKERRQ(ierr); 2629 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2630 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2631 } else { 2632 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2633 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2634 prank = rank; 2635 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2636 for (i=0;i<size;i++) { 2637 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2638 } 2639 for (i=0;i<xadj[1];i++) { 2640 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2641 } 2642 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2643 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2644 n_subdomains = (PetscInt)size; 2645 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2646 2647 /* Partition */ 2648 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2649 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2650 if (use_vwgt) { 2651 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2652 v_wgt[0] = local_size; 2653 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2654 } 2655 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2656 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2657 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2658 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2659 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2660 2661 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2662 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2663 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2664 /* clean up */ 2665 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2666 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2667 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2668 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2669 } 2670 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2671 2672 /* assemble parallel IS for sends */ 2673 i = 1; 2674 if (color) i=0; 2675 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2676 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2677 2678 /* get back IS */ 2679 *is_sends = ranks_send_to; 2680 PetscFunctionReturn(0); 2681 } 2682 2683 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2684 2685 #undef __FUNCT__ 2686 #define __FUNCT__ "MatISSubassemble" 2687 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2688 { 2689 Mat local_mat,new_mat; 2690 Mat_IS *matis; 2691 IS is_sends_internal; 2692 PetscInt rows,cols; 2693 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2694 PetscBool ismatis,isdense; 2695 ISLocalToGlobalMapping l2gmap; 2696 PetscInt* l2gmap_indices; 2697 const PetscInt* is_indices; 2698 MatType new_local_type; 2699 MatTypePrivate new_local_type_private; 2700 /* buffers */ 2701 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2702 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2703 /* MPI */ 2704 MPI_Comm comm; 2705 PetscMPIInt n_sends,n_recvs,commsize; 2706 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2707 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2708 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2709 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2710 PetscErrorCode ierr; 2711 2712 PetscFunctionBegin; 2713 /* checks */ 2714 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2715 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2716 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2717 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2718 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2719 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2720 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2721 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2722 PetscValidLogicalCollectiveInt(mat,bs,0); 2723 /* prepare IS for sending if not provided */ 2724 if (!is_sends) { 2725 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2726 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2727 } else { 2728 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2729 is_sends_internal = is_sends; 2730 } 2731 2732 /* get pointer of MATIS data */ 2733 matis = (Mat_IS*)mat->data; 2734 2735 /* get comm */ 2736 comm = PetscObjectComm((PetscObject)mat); 2737 2738 /* compute number of sends */ 2739 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2740 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2741 2742 /* compute number of receives */ 2743 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2744 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2745 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2746 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2747 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2748 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2749 ierr = PetscFree(iflags);CHKERRQ(ierr); 2750 2751 /* prepare send/receive buffers */ 2752 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2753 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2754 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2755 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2756 2757 /* Get data from local mat */ 2758 if (!isdense) { 2759 /* TODO: See below some guidelines on how to prepare the local buffers */ 2760 /* 2761 send_buffer_vals should contain the raw values of the local matrix 2762 send_buffer_idxs should contain: 2763 - MatType_PRIVATE type 2764 - PetscInt size_of_l2gmap 2765 - PetscInt global_row_indices[size_of_l2gmap] 2766 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2767 */ 2768 } else { 2769 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2770 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2771 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2772 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2773 send_buffer_idxs[1] = i; 2774 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2775 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2776 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2777 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2778 for (i=0;i<n_sends;i++) { 2779 ilengths_vals[is_indices[i]] = len*len; 2780 ilengths_idxs[is_indices[i]] = len+2; 2781 } 2782 } 2783 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2784 buf_size_idxs = 0; 2785 buf_size_vals = 0; 2786 for (i=0;i<n_recvs;i++) { 2787 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2788 buf_size_vals += (PetscInt)olengths_vals[i]; 2789 } 2790 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2791 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2792 2793 /* get new tags for clean communications */ 2794 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2795 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2796 2797 /* allocate for requests */ 2798 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2799 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2800 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2801 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2802 2803 /* communications */ 2804 ptr_idxs = recv_buffer_idxs; 2805 ptr_vals = recv_buffer_vals; 2806 for (i=0;i<n_recvs;i++) { 2807 source_dest = onodes[i]; 2808 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2809 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2810 ptr_idxs += olengths_idxs[i]; 2811 ptr_vals += olengths_vals[i]; 2812 } 2813 for (i=0;i<n_sends;i++) { 2814 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2815 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2816 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2817 } 2818 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2819 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2820 2821 /* assemble new l2g map */ 2822 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2823 ptr_idxs = recv_buffer_idxs; 2824 buf_size_idxs = 0; 2825 for (i=0;i<n_recvs;i++) { 2826 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2827 ptr_idxs += olengths_idxs[i]; 2828 } 2829 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2830 ptr_idxs = recv_buffer_idxs; 2831 buf_size_idxs = 0; 2832 for (i=0;i<n_recvs;i++) { 2833 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2834 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2835 ptr_idxs += olengths_idxs[i]; 2836 } 2837 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2838 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2839 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2840 2841 /* infer new local matrix type from received local matrices type */ 2842 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2843 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2844 new_local_type_private = MATAIJ_PRIVATE; 2845 new_local_type = MATSEQAIJ; 2846 if (n_recvs) { 2847 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 2848 ptr_idxs = recv_buffer_idxs; 2849 for (i=0;i<n_recvs;i++) { 2850 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2851 new_local_type_private = MATAIJ_PRIVATE; 2852 break; 2853 } 2854 ptr_idxs += olengths_idxs[i]; 2855 } 2856 switch (new_local_type_private) { 2857 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2858 new_local_type = MATSEQAIJ; 2859 bs = 1; 2860 break; 2861 case MATAIJ_PRIVATE: 2862 new_local_type = MATSEQAIJ; 2863 bs = 1; 2864 break; 2865 case MATBAIJ_PRIVATE: 2866 new_local_type = MATSEQBAIJ; 2867 break; 2868 case MATSBAIJ_PRIVATE: 2869 new_local_type = MATSEQSBAIJ; 2870 break; 2871 default: 2872 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2873 break; 2874 } 2875 } 2876 2877 /* create MATIS object */ 2878 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2879 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2880 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2881 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2882 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2883 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2884 2885 /* set values */ 2886 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2887 ptr_vals = recv_buffer_vals; 2888 ptr_idxs = recv_buffer_idxs; 2889 for (i=0;i<n_recvs;i++) { 2890 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2891 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2892 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2893 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2894 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2895 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2896 } 2897 ptr_idxs += olengths_idxs[i]; 2898 ptr_vals += olengths_vals[i]; 2899 } 2900 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2901 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2902 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2903 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2904 2905 { /* check */ 2906 Vec lvec,rvec; 2907 PetscReal infty_error; 2908 2909 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2910 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2911 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2912 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2913 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2914 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2915 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2916 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2917 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2918 } 2919 2920 /* free workspace */ 2921 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2922 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2923 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2924 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2925 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2926 if (isdense) { 2927 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2928 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2929 } else { 2930 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2931 } 2932 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2933 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2934 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2935 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2936 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2937 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2938 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2939 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2940 ierr = PetscFree(onodes);CHKERRQ(ierr); 2941 /* get back new mat */ 2942 *mat_n = new_mat; 2943 PetscFunctionReturn(0); 2944 } 2945 2946 #undef __FUNCT__ 2947 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2948 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2949 { 2950 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2951 PC_IS *pcis = (PC_IS*)pc->data; 2952 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2953 MatNullSpace CoarseNullSpace=NULL; 2954 ISLocalToGlobalMapping coarse_islg; 2955 IS coarse_is; 2956 PetscInt max_it,coarse_size,*local_primal_indices=NULL; 2957 PetscInt im_active=-1,active_procs=-1; 2958 PC pc_temp; 2959 PCType coarse_pc_type; 2960 KSPType coarse_ksp_type; 2961 PetscBool multilevel_requested,multilevel_allowed; 2962 PetscBool setsym,issym,isbddc,isnn; 2963 MatStructure matstruct; 2964 PetscErrorCode ierr; 2965 2966 PetscFunctionBegin; 2967 /* Assign global numbering to coarse dofs */ 2968 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2969 2970 /* infer some info from user */ 2971 issym = PETSC_FALSE; 2972 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2973 multilevel_allowed = PETSC_FALSE; 2974 multilevel_requested = PETSC_FALSE; 2975 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 2976 if (multilevel_requested) { 2977 /* count "active processes" */ 2978 im_active = !!(pcis->n); 2979 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2980 if (active_procs/pcbddc->coarsening_ratio < 2) { 2981 multilevel_allowed = PETSC_FALSE; 2982 } else { 2983 multilevel_allowed = PETSC_TRUE; 2984 } 2985 } 2986 2987 /* set defaults for coarse KSP and PC */ 2988 if (multilevel_allowed) { 2989 if (issym) { 2990 coarse_ksp_type = KSPRICHARDSON; 2991 } else { 2992 coarse_ksp_type = KSPCHEBYSHEV; 2993 } 2994 coarse_pc_type = PCBDDC; 2995 } else { 2996 coarse_ksp_type = KSPPREONLY; 2997 coarse_pc_type = PCREDUNDANT; 2998 } 2999 3000 /* create the coarse KSP object only once with defaults */ 3001 if (!pcbddc->coarse_ksp) { 3002 char prefix[256],str_level[3]; 3003 size_t len; 3004 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3005 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3006 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3007 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3008 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3009 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3010 /* prefix */ 3011 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3012 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3013 if (!pcbddc->current_level) { 3014 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3015 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3016 } else { 3017 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3018 if (pcbddc->current_level>1) len -= 2; 3019 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3020 *(prefix+len)='\0'; 3021 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3022 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3023 } 3024 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3025 } 3026 /* allow user customization */ 3027 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3028 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3029 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3030 3031 /* get some info after set from options */ 3032 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3033 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3034 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3035 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3036 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3037 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3038 isbddc = PETSC_FALSE; 3039 } 3040 3041 /* propagate BDDC info to the next level */ 3042 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3043 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3044 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3045 3046 /* creates temporary MATIS object for coarse matrix */ 3047 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3048 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3049 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3050 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3051 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3052 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3053 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3054 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3055 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3056 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 3057 3058 /* assemble coarse matrix */ 3059 if (isbddc || isnn) { 3060 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3061 } else { 3062 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3063 } 3064 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3065 3066 /* create local to global scatters for coarse problem */ 3067 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3068 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3069 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3070 3071 /* propagate symmetry info to coarse matrix */ 3072 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3073 3074 /* Compute coarse null space (special handling by BDDC only) */ 3075 if (pcbddc->NullSpace) { 3076 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3077 if (isbddc) { 3078 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3079 } else { 3080 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3081 } 3082 } 3083 3084 /* set operators */ 3085 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3086 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3087 3088 /* additional KSP customization */ 3089 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3090 if (max_it < 5) { 3091 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3092 } 3093 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3094 3095 3096 /* print some info if requested */ 3097 if (pcbddc->dbg_flag) { 3098 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3099 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3100 if (!multilevel_allowed) { 3101 if (multilevel_requested) { 3102 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3103 } else if (pcbddc->max_levels) { 3104 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3105 } 3106 } 3107 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3108 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3109 } 3110 3111 /* setup coarse ksp */ 3112 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3113 if (pcbddc->dbg_flag) { 3114 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3115 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3116 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3117 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3118 } 3119 3120 /* Check coarse problem if requested */ 3121 if (pcbddc->dbg_flag) { 3122 KSP check_ksp; 3123 KSPType check_ksp_type; 3124 PC check_pc; 3125 Vec check_vec; 3126 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3127 PetscInt its; 3128 PetscBool ispreonly,compute; 3129 3130 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3131 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3132 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3133 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3134 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3135 if (ispreonly) { 3136 check_ksp_type = KSPPREONLY; 3137 compute = PETSC_FALSE; 3138 } else { 3139 if (issym) check_ksp_type = KSPCG; 3140 else check_ksp_type = KSPGMRES; 3141 compute = PETSC_TRUE; 3142 } 3143 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3144 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3145 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3146 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3147 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3148 /* create random vec */ 3149 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3150 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3151 if (CoarseNullSpace) { 3152 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3153 } 3154 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3155 /* solve coarse problem */ 3156 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3157 if (CoarseNullSpace) { 3158 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3159 } 3160 /* check coarse problem residual error */ 3161 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3162 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3163 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3164 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3165 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3166 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3167 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3168 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3169 /* get eigenvalue estimation if preonly has not been requested */ 3170 if (!ispreonly) { 3171 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3172 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3173 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3174 } 3175 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3176 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3177 } 3178 /* free memory */ 3179 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3180 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3181 PetscFunctionReturn(0); 3182 } 3183 3184 #undef __FUNCT__ 3185 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3186 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3187 { 3188 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3189 PC_IS* pcis = (PC_IS*)pc->data; 3190 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3191 PetscInt i,j,coarse_size; 3192 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3193 PetscErrorCode ierr; 3194 3195 PetscFunctionBegin; 3196 /* get indices in local ordering for vertices and constraints */ 3197 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3198 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3199 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3200 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3201 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3202 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3203 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3204 3205 /* Compute global number of coarse dofs */ 3206 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3207 3208 /* check numbering */ 3209 if (pcbddc->dbg_flag) { 3210 PetscScalar coarsesum,*array; 3211 3212 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3213 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3214 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3215 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3216 for (i=0;i<pcbddc->local_primal_size;i++) { 3217 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3218 } 3219 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3220 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3221 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3222 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3223 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3224 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3225 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3226 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3227 for (i=0;i<pcis->n;i++) { 3228 if (array[i] == 1.0) { 3229 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3230 } 3231 } 3232 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3233 for (i=0;i<pcis->n;i++) { 3234 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3235 } 3236 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3237 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3238 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3239 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3240 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3241 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3242 if (pcbddc->dbg_flag > 1) { 3243 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3244 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3245 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3246 for (i=0;i<pcbddc->local_primal_size;i++) { 3247 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]); 3248 } 3249 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3250 } 3251 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3252 } 3253 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3254 /* get back data */ 3255 *coarse_size_n = coarse_size; 3256 *local_primal_indices_n = local_primal_indices; 3257 PetscFunctionReturn(0); 3258 } 3259 3260