1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCResetCustomization" 47 PetscErrorCode PCBDDCResetCustomization(PC pc) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 PetscInt i; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 56 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 59 for (i=0;i<pcbddc->n_ISForDofs;i++) { 60 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 61 } 62 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 63 pcbddc->n_ISForDofs = 0; 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "PCBDDCResetTopography" 69 PetscErrorCode PCBDDCResetTopography(PC pc) 70 { 71 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 72 PetscErrorCode ierr; 73 74 PetscFunctionBegin; 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 102 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "PCBDDCCreateWorkVectors" 111 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 112 { 113 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 114 PC_IS *pcis = (PC_IS*)pc->data; 115 VecType impVecType; 116 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 117 PetscErrorCode ierr; 118 119 PetscFunctionBegin; 120 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 121 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 122 local_primal_size = n_constraints+n_vertices; 123 n_R = pcis->n-n_vertices; 124 /* local work vectors */ 125 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 126 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 127 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 128 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 129 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 130 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 131 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 132 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 133 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 134 if (n_constraints) { 135 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 136 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 137 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "PCBDDCSetUpCorrection" 144 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 145 { 146 PetscErrorCode ierr; 147 /* pointers to pcis and pcbddc */ 148 PC_IS* pcis = (PC_IS*)pc->data; 149 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 150 /* submatrices of local problem */ 151 Mat A_RV,A_VR,A_VV; 152 /* working matrices */ 153 Mat M1,M2,M3,C_CR; 154 /* working vectors */ 155 Vec vec1_C,vec2_C,vec1_V,vec2_V; 156 /* additional working stuff */ 157 IS is_aux; 158 ISLocalToGlobalMapping BtoNmap; 159 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 160 const PetscScalar *array,*row_cmat_values; 161 const PetscInt *row_cmat_indices,*idx_R_local; 162 PetscInt *vertices,*idx_V_B,*auxindices; 163 PetscInt n_vertices,n_constraints,size_of_constraint; 164 PetscInt i,j,n_R,n_D,n_B; 165 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check; 166 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 167 MatType impMatType; 168 /* some shortcuts to scalars */ 169 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 170 /* for debugging purposes */ 171 PetscReal *coarsefunctions_errors,*constraints_errors; 172 173 PetscFunctionBegin; 174 /* get number of vertices and their local indices */ 175 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 176 n_constraints = pcbddc->local_primal_size-n_vertices; 177 /* Set Non-overlapping dimensions */ 178 n_B = pcis->n_B; n_D = pcis->n - n_B; 179 n_R = pcis->n-n_vertices; 180 181 /* Set types for local objects needed by BDDC precondtioner */ 182 impMatType = MATSEQDENSE; 183 184 /* Allocating some extra storage just to be safe */ 185 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 186 for (i=0;i<pcis->n;i++) auxindices[i]=i; 187 188 /* vertices in boundary numbering */ 189 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 190 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 191 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 192 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 193 if (i != n_vertices) { 194 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 195 } 196 197 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 198 if (n_constraints) { 199 /* work vectors */ 200 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 201 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 202 /* auxiliary matrices */ 203 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 204 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 205 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 206 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 207 208 /* Extract constraints on R nodes: C_{CR} */ 209 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 210 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 211 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 212 213 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 214 for (i=0;i<n_constraints;i++) { 215 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 216 /* Get row of constraint matrix in R numbering */ 217 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 218 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 219 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 220 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 221 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 222 /* Solve for row of constraint matrix in R numbering */ 223 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 224 /* Set values in local_auxmat2 */ 225 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 226 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 227 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 228 } 229 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 230 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 231 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 232 233 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 234 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 235 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 236 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 237 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 238 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 239 ierr = MatSetUp(M1);CHKERRQ(ierr); 240 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 241 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 242 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 243 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 244 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 245 ierr = MatDestroy(&M2);CHKERRQ(ierr); 246 ierr = MatDestroy(&M3);CHKERRQ(ierr); 247 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 248 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 249 } 250 251 /* Get submatrices from subdomain matrix */ 252 if (n_vertices) { 253 PetscInt ibs,mbs; 254 PetscBool issbaij; 255 Mat newmat; 256 257 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 258 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 259 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 260 if (ibs != mbs) { /* need to convert to SEQAIJ */ 261 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 262 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 263 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 264 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 265 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 266 } else { 267 /* this is safe */ 268 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 269 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 270 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 271 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 272 /* which of the two approaches is faster? */ 273 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 274 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 275 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 276 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 277 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 278 } else { 279 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 280 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 281 } 282 } 283 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 284 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 285 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 286 } 287 288 /* Matrix of coarse basis functions (local) */ 289 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 290 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 291 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 292 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 293 if (pcbddc->switch_static || pcbddc->dbg_flag) { 294 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 295 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 296 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 297 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 298 } 299 300 if (pcbddc->dbg_flag) { 301 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 302 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 303 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 304 } 305 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 306 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 307 308 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 309 310 /* vertices */ 311 for (i=0;i<n_vertices;i++) { 312 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 313 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 314 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 315 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 316 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 317 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 318 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 319 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 320 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 321 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 322 if (n_constraints) { 323 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 324 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 325 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 326 } 327 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 328 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 329 330 /* Set values in coarse basis function and subdomain part of coarse_mat */ 331 /* coarse basis functions */ 332 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 333 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 334 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 335 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 336 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 337 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 338 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 339 if (pcbddc->switch_static || pcbddc->dbg_flag) { 340 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 341 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 342 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 343 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 344 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 345 } 346 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 347 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 348 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 349 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 350 if (n_constraints) { 351 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 352 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 353 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 354 } 355 356 /* check */ 357 if (pcbddc->dbg_flag) { 358 /* assemble subdomain vector on local nodes */ 359 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 360 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 361 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 362 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 363 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 364 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 365 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 366 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 367 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 368 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 369 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 370 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 371 if (n_constraints) { 372 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 373 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 374 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 375 } 376 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 377 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 378 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 379 /* check saddle point solution */ 380 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 381 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 382 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 383 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 384 /* shift by the identity matrix */ 385 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 386 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 387 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 388 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 389 } 390 } 391 392 /* constraints */ 393 for (i=0;i<n_constraints;i++) { 394 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 395 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 396 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 397 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 398 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 399 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 400 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 401 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 402 if (n_vertices) { 403 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 404 } 405 /* Set values in coarse basis function and subdomain part of coarse_mat */ 406 /* coarse basis functions */ 407 j = i+n_vertices; /* don't touch this! */ 408 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 409 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 410 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 411 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 412 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 413 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 414 if (pcbddc->switch_static || pcbddc->dbg_flag) { 415 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 416 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 417 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 418 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 419 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 420 } 421 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 422 if (n_vertices) { 423 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 424 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 425 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 426 } 427 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 428 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 429 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 430 431 if (pcbddc->dbg_flag) { 432 /* assemble subdomain vector on nodes */ 433 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 434 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 435 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 436 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 437 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 438 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 439 /* assemble subdomain vector of lagrange multipliers */ 440 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 441 if (n_vertices) { 442 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 443 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 444 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 445 } 446 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 447 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 448 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 449 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 450 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 451 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 452 /* check saddle point solution */ 453 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 454 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 456 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 457 /* shift by the identity matrix */ 458 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 459 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 460 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 461 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 462 } 463 } 464 /* call assembling routines for local coarse basis */ 465 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 467 if (pcbddc->switch_static || pcbddc->dbg_flag) { 468 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 469 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 470 } 471 472 /* compute other basis functions for non-symmetric problems */ 473 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 474 if (!setsym || (setsym && !issym)) { 475 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 476 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 477 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 478 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 479 if (pcbddc->switch_static || pcbddc->dbg_flag) { 480 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 481 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 482 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 483 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 484 } 485 for (i=0;i<pcbddc->local_primal_size;i++) { 486 if (n_constraints) { 487 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 488 for (j=0;j<n_constraints;j++) { 489 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 490 } 491 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 492 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 493 } 494 if (i<n_vertices) { 495 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 496 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 497 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 498 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 499 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 500 if (n_constraints) { 501 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 502 } 503 } else { 504 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 505 } 506 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 507 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 508 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 509 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 510 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 511 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 512 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 513 if (i<n_vertices) { 514 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 515 } 516 if (pcbddc->switch_static || pcbddc->dbg_flag) { 517 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 518 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 519 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 520 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 521 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 522 } 523 524 if (pcbddc->dbg_flag) { 525 /* assemble subdomain vector on nodes */ 526 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 527 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 528 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 529 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 530 if (i<n_vertices) { 531 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 532 } 533 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 534 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 535 /* assemble subdomain vector of lagrange multipliers */ 536 for (j=0;j<pcbddc->local_primal_size;j++) { 537 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 538 } 539 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 540 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 541 /* check saddle point solution */ 542 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 543 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 544 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 545 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 546 /* shift by the identity matrix */ 547 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 548 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 549 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 550 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 551 } 552 } 553 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 555 if (pcbddc->switch_static || pcbddc->dbg_flag) { 556 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 557 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 558 } 559 unsymmetric_check = PETSC_TRUE; 560 } else { /* take references to already computed coarse basis */ 561 unsymmetric_check = PETSC_FALSE; 562 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 563 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 564 if (pcbddc->coarse_phi_D) { 565 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 566 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 567 } 568 } 569 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 570 /* Checking coarse_sub_mat and coarse basis functios */ 571 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 572 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 573 if (pcbddc->dbg_flag) { 574 Mat coarse_sub_mat; 575 Mat AUXMAT,TM1,TM2,TM3,TM4; 576 Mat coarse_phi_D,coarse_phi_B; 577 Mat coarse_psi_D,coarse_psi_B; 578 Mat A_II,A_BB,A_IB,A_BI; 579 MatType checkmattype=MATSEQAIJ; 580 PetscReal real_value; 581 582 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 583 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 584 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 585 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 586 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 587 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 588 if (unsymmetric_check) { 589 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 590 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 591 } 592 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 593 594 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 595 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 596 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 597 if (unsymmetric_check) { 598 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 599 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 600 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 601 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 602 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 603 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 604 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 605 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 606 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 607 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 608 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 609 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 610 } else { 611 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 612 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 613 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 614 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 615 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 616 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 617 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 618 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 619 } 620 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 621 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 622 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 623 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 624 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 625 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 628 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 629 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 630 for (i=0;i<pcbddc->local_primal_size;i++) { 631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 632 } 633 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 634 for (i=0;i<pcbddc->local_primal_size;i++) { 635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 636 } 637 if (unsymmetric_check) { 638 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 639 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 640 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 641 } 642 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 643 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 644 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 645 } 646 } 647 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 648 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 649 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 650 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 651 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 652 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 653 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 654 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 655 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 656 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 657 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 658 if (unsymmetric_check) { 659 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 660 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 661 } 662 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 663 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 664 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 665 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 666 } 667 /* free memory */ 668 if (n_vertices) { 669 ierr = PetscFree(vertices);CHKERRQ(ierr); 670 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 671 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 672 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 673 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 674 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 675 } 676 if (n_constraints) { 677 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 678 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 679 ierr = MatDestroy(&M1);CHKERRQ(ierr); 680 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 681 } 682 ierr = PetscFree(auxindices);CHKERRQ(ierr); 683 /* get back data */ 684 *coarse_submat_vals_n = coarse_submat_vals; 685 PetscFunctionReturn(0); 686 } 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 690 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 691 { 692 PC_IS* pcis = (PC_IS*)(pc->data); 693 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 694 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 695 PetscBool issbaij,isbaij; 696 /* manage repeated solves */ 697 MatReuse reuse; 698 MatStructure matstruct; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 /* get mat flags */ 703 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 704 reuse = MAT_INITIAL_MATRIX; 705 if (pc->setupcalled) { 706 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 707 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 708 if (matstruct == SAME_NONZERO_PATTERN) { 709 reuse = MAT_REUSE_MATRIX; 710 } else { 711 reuse = MAT_INITIAL_MATRIX; 712 } 713 } 714 if (reuse == MAT_INITIAL_MATRIX) { 715 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 717 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 718 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 719 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 720 } 721 722 /* transform local matrices if needed */ 723 if (pcbddc->use_change_of_basis) { 724 Mat change_mat_all; 725 PetscScalar *row_cmat_values; 726 PetscInt *row_cmat_indices; 727 PetscInt *nnz,*is_indices,*temp_indices; 728 PetscInt i,j,k,n_D,n_B; 729 730 /* Get Non-overlapping dimensions */ 731 n_B = pcis->n_B; 732 n_D = pcis->n-n_B; 733 734 /* compute nonzero structure of change of basis on all local nodes */ 735 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 736 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 737 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 738 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 739 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 740 k=1; 741 for (i=0;i<n_B;i++) { 742 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 743 nnz[is_indices[i]]=j; 744 if (k < j) k = j; 745 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 746 } 747 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 748 /* assemble change of basis matrix on the whole set of local dofs */ 749 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 750 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 751 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 752 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 753 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 754 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 755 for (i=0;i<n_D;i++) { 756 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 757 } 758 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 759 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 760 for (i=0;i<n_B;i++) { 761 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 762 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 763 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 764 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 765 } 766 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 768 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 769 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 770 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 771 if (!issbaij && !isbaij) { 772 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 773 } else { 774 Mat work_mat; 775 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 776 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 777 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 778 } 779 /* 780 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 781 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 782 */ 783 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 784 ierr = PetscFree(nnz);CHKERRQ(ierr); 785 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 786 } else { 787 /* without change of basis, the local matrix is unchanged */ 788 if (!pcbddc->local_mat) { 789 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 790 pcbddc->local_mat = matis->A; 791 } 792 } 793 794 /* get submatrices */ 795 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 796 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 797 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 798 if (!issbaij) { 799 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 800 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 801 } else { 802 Mat newmat; 803 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 804 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 805 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 806 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 807 } 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 813 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 814 { 815 PC_IS* pcis = (PC_IS*)(pc->data); 816 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 817 IS is_aux1,is_aux2; 818 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 819 PetscInt n_vertices,i,j,n_R,n_D,n_B; 820 PetscInt vbs,bs; 821 PetscBT bitmask; 822 PetscErrorCode ierr; 823 824 PetscFunctionBegin; 825 /* Set Non-overlapping dimensions */ 826 n_B = pcis->n_B; n_D = pcis->n - n_B; 827 /* get vertex indices from constraint matrix */ 828 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 829 830 /* create auxiliary bitmask */ 831 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 832 for (i=0;i<n_vertices;i++) { 833 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 834 } 835 836 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 837 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 838 for (i=0, n_R=0; i<pcis->n; i++) { 839 if (!PetscBTLookup(bitmask,i)) { 840 idx_R_local[n_R] = i; 841 n_R++; 842 } 843 } 844 845 /* Block code */ 846 vbs = 1; 847 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 848 if (bs>1 && !(n_vertices%bs)) { 849 PetscBool is_blocked = PETSC_TRUE; 850 PetscInt *vary; 851 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 852 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 853 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 854 for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++; 855 for (i=0; i<n_vertices; i++) { 856 if (vary[i]!=0 && vary[i]!=bs) { 857 is_blocked = PETSC_FALSE; 858 break; 859 } 860 } 861 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 862 vbs = bs; 863 for (i=0;i<n_R/vbs;i++) { 864 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 865 } 866 } 867 ierr = PetscFree(vary);CHKERRQ(ierr); 868 } 869 ierr = PetscFree(vertices);CHKERRQ(ierr); 870 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 871 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 872 873 /* print some info if requested */ 874 if (pcbddc->dbg_flag) { 875 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 876 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 877 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 878 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 879 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 880 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 881 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 882 } 883 884 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 885 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 886 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 887 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 888 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 889 for (i=0; i<n_D; i++) { 890 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 891 } 892 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 893 for (i=0, j=0; i<n_R; i++) { 894 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 895 aux_array1[j++] = i; 896 } 897 } 898 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 899 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 900 for (i=0, j=0; i<n_B; i++) { 901 if (!PetscBTLookup(bitmask,is_indices[i])) { 902 aux_array2[j++] = i; 903 } 904 } 905 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 906 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 907 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 908 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 909 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 910 911 if (pcbddc->switch_static || pcbddc->dbg_flag) { 912 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 913 for (i=0, j=0; i<n_R; i++) { 914 if (PetscBTLookup(bitmask,idx_R_local[i])) { 915 aux_array1[j++] = i; 916 } 917 } 918 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 919 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 920 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 921 } 922 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 923 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 924 PetscFunctionReturn(0); 925 } 926 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 930 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 931 { 932 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 933 PC_IS *pcis = (PC_IS*)pc->data; 934 PC pc_temp; 935 Mat A_RR; 936 Vec vec1,vec2,vec3; 937 MatStructure matstruct; 938 PetscScalar m_one = -1.0; 939 PetscReal value; 940 PetscInt n_D,n_R,ibs,mbs; 941 PetscBool use_exact,use_exact_reduced,issbaij; 942 PetscErrorCode ierr; 943 /* prefixes stuff */ 944 char dir_prefix[256],neu_prefix[256],str_level[3]; 945 size_t len; 946 947 PetscFunctionBegin; 948 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 949 950 /* compute prefixes */ 951 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 952 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 953 if (!pcbddc->current_level) { 954 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 955 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 956 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 957 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 958 } else { 959 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 960 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 961 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 962 len -= 15; /* remove "pc_bddc_coarse_" */ 963 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 964 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 965 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 966 *(dir_prefix+len)='\0'; 967 *(neu_prefix+len)='\0'; 968 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 969 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 970 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 971 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 972 } 973 974 /* DIRICHLET PROBLEM */ 975 /* Matrix for Dirichlet problem is pcis->A_II */ 976 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 977 if (!pcbddc->ksp_D) { /* create object if not yet build */ 978 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 979 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 980 /* default */ 981 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 982 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 983 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 984 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 985 if (issbaij) { 986 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 987 } else { 988 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 989 } 990 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 991 } 992 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 993 /* Allow user's customization */ 994 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 995 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 996 if (!n_D) { 997 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 998 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 999 } 1000 /* Set Up KSP for Dirichlet problem of BDDC */ 1001 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1002 /* set ksp_D into pcis data */ 1003 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1004 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1005 pcis->ksp_D = pcbddc->ksp_D; 1006 1007 /* NEUMANN PROBLEM */ 1008 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1009 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1010 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1011 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1012 if (ibs != mbs) { 1013 Mat newmat; 1014 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1015 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1016 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1017 } else { 1018 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1019 } 1020 if (!pcbddc->ksp_R) { /* create object if not yet build */ 1021 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1022 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1023 /* default */ 1024 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1025 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1026 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1027 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1028 if (issbaij) { 1029 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1030 } else { 1031 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1032 } 1033 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1034 } 1035 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1036 /* Allow user's customization */ 1037 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1038 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1039 if (!n_R) { 1040 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1041 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1042 } 1043 /* Set Up KSP for Neumann problem of BDDC */ 1044 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1045 1046 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1047 1048 /* Dirichlet */ 1049 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 1050 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1051 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1052 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 1053 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 1054 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1055 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1056 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1057 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1058 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1059 /* need to be adapted? */ 1060 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1061 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1062 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1063 /* print info */ 1064 if (pcbddc->dbg_flag) { 1065 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1066 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1067 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1068 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1069 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1070 } 1071 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1072 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1073 } 1074 1075 /* Neumann */ 1076 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 1077 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1078 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1079 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 1080 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 1081 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1082 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1083 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1084 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1085 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1086 /* need to be adapted? */ 1087 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1088 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1089 /* print info */ 1090 if (pcbddc->dbg_flag) { 1091 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1092 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1093 } 1094 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1095 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1096 } 1097 /* free Neumann problem's matrix */ 1098 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1104 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc) 1105 { 1106 PetscErrorCode ierr; 1107 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1108 1109 PetscFunctionBegin; 1110 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1111 if (pcbddc->local_auxmat1) { 1112 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1113 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1114 } 1115 PetscFunctionReturn(0); 1116 } 1117 1118 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1121 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1122 { 1123 PetscErrorCode ierr; 1124 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1125 PC_IS* pcis = (PC_IS*) (pc->data); 1126 const PetscScalar zero = 0.0; 1127 1128 PetscFunctionBegin; 1129 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1130 if (applytranspose) { 1131 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1132 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1133 } else { 1134 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1135 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1136 } 1137 /* Scatter data of coarse_rhs */ 1138 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1139 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 1141 /* Local solution on R nodes */ 1142 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1143 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1144 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1145 if (pcbddc->switch_static) { 1146 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1147 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 } 1149 ierr = PCBDDCSolveSubstructureCorrection(pc);CHKERRQ(ierr); 1150 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1151 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1152 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1153 if (pcbddc->switch_static) { 1154 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1155 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1156 } 1157 1158 /* Coarse solution */ 1159 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1160 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1161 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1162 } 1163 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1164 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 1166 /* Sum contributions from two levels */ 1167 if (applytranspose) { 1168 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1169 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1170 } else { 1171 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1172 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1173 } 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1179 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1180 { 1181 PetscErrorCode ierr; 1182 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1183 1184 PetscFunctionBegin; 1185 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1191 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1192 { 1193 PetscErrorCode ierr; 1194 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1195 1196 PetscFunctionBegin; 1197 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1198 PetscFunctionReturn(0); 1199 } 1200 1201 /* uncomment for testing purposes */ 1202 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1203 #undef __FUNCT__ 1204 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1205 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1206 { 1207 PetscErrorCode ierr; 1208 PC_IS* pcis = (PC_IS*)(pc->data); 1209 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1210 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1211 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1212 MatType impMatType=MATSEQAIJ; 1213 /* one and zero */ 1214 PetscScalar one=1.0,zero=0.0; 1215 /* space to store constraints and their local indices */ 1216 PetscScalar *temp_quadrature_constraint; 1217 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1218 /* iterators */ 1219 PetscInt i,j,k,total_counts,temp_start_ptr; 1220 /* stuff to store connected components stored in pcbddc->mat_graph */ 1221 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1222 PetscInt n_ISForFaces,n_ISForEdges; 1223 /* near null space stuff */ 1224 MatNullSpace nearnullsp; 1225 const Vec *nearnullvecs; 1226 Vec *localnearnullsp; 1227 PetscBool nnsp_has_cnst; 1228 PetscInt nnsp_size; 1229 PetscScalar *array; 1230 /* BLAS integers */ 1231 PetscBLASInt lwork,lierr; 1232 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1233 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1234 /* LAPACK working arrays for SVD or POD */ 1235 PetscBool skip_lapack; 1236 PetscScalar *work; 1237 PetscReal *singular_vals; 1238 #if defined(PETSC_USE_COMPLEX) 1239 PetscReal *rwork; 1240 #endif 1241 #if defined(PETSC_MISSING_LAPACK_GESVD) 1242 PetscBLASInt Blas_one_2=1; 1243 PetscScalar *temp_basis,*correlation_mat; 1244 #else 1245 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1246 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1247 #endif 1248 /* change of basis */ 1249 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1250 PetscBool boolforchange,qr_needed; 1251 PetscBT touched,change_basis,qr_needed_idx; 1252 /* auxiliary stuff */ 1253 PetscInt *nnz,*is_indices,*local_to_B; 1254 /* some quantities */ 1255 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1256 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1257 1258 1259 PetscFunctionBegin; 1260 /* Get index sets for faces, edges and vertices from graph */ 1261 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1262 pcbddc->use_vertices = PETSC_TRUE; 1263 } 1264 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1265 /* HACK: provide functions to set change of basis */ 1266 if (!ISForVertices && pcbddc->NullSpace) { 1267 pcbddc->use_change_of_basis = PETSC_TRUE; 1268 pcbddc->use_change_on_faces = PETSC_FALSE; 1269 } 1270 /* print some info */ 1271 if (pcbddc->dbg_flag) { 1272 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1273 i = 0; 1274 if (ISForVertices) { 1275 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1276 } 1277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1278 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1279 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1280 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1281 } 1282 /* check if near null space is attached to global mat */ 1283 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1284 if (nearnullsp) { 1285 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1286 } else { /* if near null space is not provided BDDC uses constants by default */ 1287 nnsp_size = 0; 1288 nnsp_has_cnst = PETSC_TRUE; 1289 } 1290 /* get max number of constraints on a single cc */ 1291 max_constraints = nnsp_size; 1292 if (nnsp_has_cnst) max_constraints++; 1293 1294 /* 1295 Evaluate maximum storage size needed by the procedure 1296 - temp_indices will contain start index of each constraint stored as follows 1297 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1298 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1299 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1300 */ 1301 total_counts = n_ISForFaces+n_ISForEdges; 1302 total_counts *= max_constraints; 1303 n_vertices = 0; 1304 if (ISForVertices) { 1305 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1306 } 1307 total_counts += n_vertices; 1308 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1309 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1310 total_counts = 0; 1311 max_size_of_constraint = 0; 1312 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1313 if (i<n_ISForEdges) { 1314 used_IS = &ISForEdges[i]; 1315 } else { 1316 used_IS = &ISForFaces[i-n_ISForEdges]; 1317 } 1318 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1319 total_counts += j; 1320 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1321 } 1322 total_counts *= max_constraints; 1323 total_counts += n_vertices; 1324 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1325 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1326 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1327 /* local to boundary numbering */ 1328 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1329 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1330 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1331 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1332 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1333 /* get local part of global near null space vectors */ 1334 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1335 for (k=0;k<nnsp_size;k++) { 1336 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1337 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1338 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1339 } 1340 1341 /* whether or not to skip lapack calls */ 1342 skip_lapack = PETSC_TRUE; 1343 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1344 1345 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1346 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1347 PetscScalar temp_work; 1348 #if defined(PETSC_MISSING_LAPACK_GESVD) 1349 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1350 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1351 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1352 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1353 #if defined(PETSC_USE_COMPLEX) 1354 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1355 #endif 1356 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1357 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1358 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1359 lwork = -1; 1360 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1361 #if !defined(PETSC_USE_COMPLEX) 1362 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1363 #else 1364 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1365 #endif 1366 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1367 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1368 #else /* on missing GESVD */ 1369 /* SVD */ 1370 PetscInt max_n,min_n; 1371 max_n = max_size_of_constraint; 1372 min_n = max_constraints; 1373 if (max_size_of_constraint < max_constraints) { 1374 min_n = max_size_of_constraint; 1375 max_n = max_constraints; 1376 } 1377 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1378 #if defined(PETSC_USE_COMPLEX) 1379 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1380 #endif 1381 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1382 lwork = -1; 1383 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1384 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1385 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1386 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1387 #if !defined(PETSC_USE_COMPLEX) 1388 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1389 #else 1390 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1391 #endif 1392 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1393 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1394 #endif /* on missing GESVD */ 1395 /* Allocate optimal workspace */ 1396 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1397 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1398 } 1399 /* Now we can loop on constraining sets */ 1400 total_counts = 0; 1401 temp_indices[0] = 0; 1402 /* vertices */ 1403 if (ISForVertices) { 1404 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1405 if (nnsp_has_cnst) { /* consider all vertices */ 1406 for (i=0;i<n_vertices;i++) { 1407 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1408 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1409 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1410 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1411 total_counts++; 1412 } 1413 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1414 PetscBool used_vertex; 1415 for (i=0;i<n_vertices;i++) { 1416 used_vertex = PETSC_FALSE; 1417 k = 0; 1418 while (!used_vertex && k<nnsp_size) { 1419 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1420 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1421 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1422 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1423 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1424 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1425 total_counts++; 1426 used_vertex = PETSC_TRUE; 1427 } 1428 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1429 k++; 1430 } 1431 } 1432 } 1433 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1434 n_vertices = total_counts; 1435 } 1436 1437 /* edges and faces */ 1438 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1439 if (i<n_ISForEdges) { 1440 used_IS = &ISForEdges[i]; 1441 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1442 } else { 1443 used_IS = &ISForFaces[i-n_ISForEdges]; 1444 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1445 } 1446 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1447 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1448 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1449 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1450 /* change of basis should not be performed on local periodic nodes */ 1451 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1452 if (nnsp_has_cnst) { 1453 PetscScalar quad_value; 1454 temp_constraints++; 1455 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1456 for (j=0;j<size_of_constraint;j++) { 1457 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1458 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1459 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1460 } 1461 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1462 total_counts++; 1463 } 1464 for (k=0;k<nnsp_size;k++) { 1465 PetscReal real_value; 1466 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1467 for (j=0;j<size_of_constraint;j++) { 1468 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1469 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1470 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1471 } 1472 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1473 /* check if array is null on the connected component */ 1474 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1475 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1476 if (real_value > 0.0) { /* keep indices and values */ 1477 temp_constraints++; 1478 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1479 total_counts++; 1480 } 1481 } 1482 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1483 valid_constraints = temp_constraints; 1484 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1485 if (!pcbddc->use_nnsp_true && temp_constraints) { 1486 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1487 1488 #if defined(PETSC_MISSING_LAPACK_GESVD) 1489 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1490 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1491 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1492 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1493 from that computed using LAPACKgesvd 1494 -> This is due to a different computation of eigenvectors in LAPACKheev 1495 -> The quality of the POD-computed basis will be the same */ 1496 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1497 /* Store upper triangular part of correlation matrix */ 1498 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1499 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1500 for (j=0;j<temp_constraints;j++) { 1501 for (k=0;k<j+1;k++) { 1502 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1503 } 1504 } 1505 /* compute eigenvalues and eigenvectors of correlation matrix */ 1506 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1507 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1508 #if !defined(PETSC_USE_COMPLEX) 1509 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1510 #else 1511 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1512 #endif 1513 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1514 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1515 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1516 j=0; 1517 while (j < temp_constraints && singular_vals[j] < tol) j++; 1518 total_counts=total_counts-j; 1519 valid_constraints = temp_constraints-j; 1520 /* scale and copy POD basis into used quadrature memory */ 1521 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1522 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1523 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1524 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1525 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1526 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1527 if (j<temp_constraints) { 1528 PetscInt ii; 1529 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1530 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1531 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1532 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1533 for (k=0;k<temp_constraints-j;k++) { 1534 for (ii=0;ii<size_of_constraint;ii++) { 1535 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1536 } 1537 } 1538 } 1539 #else /* on missing GESVD */ 1540 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1541 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1542 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1543 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1544 #if !defined(PETSC_USE_COMPLEX) 1545 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1546 #else 1547 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1548 #endif 1549 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1550 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1551 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1552 k = temp_constraints; 1553 if (k > size_of_constraint) k = size_of_constraint; 1554 j = 0; 1555 while (j < k && singular_vals[k-j-1] < tol) j++; 1556 total_counts = total_counts-temp_constraints+k-j; 1557 valid_constraints = k-j; 1558 #endif /* on missing GESVD */ 1559 } 1560 /* setting change_of_basis flag is safe now */ 1561 if (boolforchange) { 1562 for (j=0;j<valid_constraints;j++) { 1563 PetscBTSet(change_basis,total_counts-j-1); 1564 } 1565 } 1566 } 1567 /* free index sets of faces, edges and vertices */ 1568 for (i=0;i<n_ISForFaces;i++) { 1569 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1570 } 1571 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1572 for (i=0;i<n_ISForEdges;i++) { 1573 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1574 } 1575 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1576 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1577 1578 /* free workspace */ 1579 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1580 ierr = PetscFree(work);CHKERRQ(ierr); 1581 #if defined(PETSC_USE_COMPLEX) 1582 ierr = PetscFree(rwork);CHKERRQ(ierr); 1583 #endif 1584 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1585 #if defined(PETSC_MISSING_LAPACK_GESVD) 1586 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1587 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1588 #endif 1589 } 1590 for (k=0;k<nnsp_size;k++) { 1591 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1592 } 1593 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1594 1595 /* set quantities in pcbddc data structure */ 1596 /* n_vertices defines the number of subdomain corners in the primal space */ 1597 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1598 pcbddc->local_primal_size = total_counts; 1599 pcbddc->n_vertices = n_vertices; 1600 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1601 1602 /* Create constraint matrix */ 1603 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1604 /* so we need to set it up properly either with or without change of basis */ 1605 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1606 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1607 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1608 /* array to compute a local numbering of constraints : vertices first then constraints */ 1609 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1610 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1611 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1612 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1613 /* auxiliary stuff for basis change */ 1614 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1615 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1616 1617 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1618 total_primal_vertices=0; 1619 for (i=0;i<pcbddc->local_primal_size;i++) { 1620 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1621 if (size_of_constraint == 1) { 1622 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1623 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1624 aux_primal_minloc[total_primal_vertices]=0; 1625 total_primal_vertices++; 1626 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1627 PetscInt min_loc,min_index; 1628 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1629 /* find first untouched local node */ 1630 k = 0; 1631 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1632 min_index = global_indices[k]; 1633 min_loc = k; 1634 /* search the minimum among global nodes already untouched on the cc */ 1635 for (k=1;k<size_of_constraint;k++) { 1636 /* there can be more than one constraint on a single connected component */ 1637 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1638 min_index = global_indices[k]; 1639 min_loc = k; 1640 } 1641 } 1642 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1643 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1644 aux_primal_minloc[total_primal_vertices]=min_loc; 1645 total_primal_vertices++; 1646 } 1647 } 1648 /* determine if a QR strategy is needed for change of basis */ 1649 qr_needed = PETSC_FALSE; 1650 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1651 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1652 if (PetscBTLookup(change_basis,i)) { 1653 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1654 j = 0; 1655 for (k=0;k<size_of_constraint;k++) { 1656 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1657 j++; 1658 } 1659 } 1660 /* found more than one primal dof on the cc */ 1661 if (j > 1) { 1662 PetscBTSet(qr_needed_idx,i); 1663 qr_needed = PETSC_TRUE; 1664 } 1665 } 1666 } 1667 /* free workspace */ 1668 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1669 1670 /* permute indices in order to have a sorted set of vertices */ 1671 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1672 1673 /* nonzero structure of constraint matrix */ 1674 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1675 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1676 j=total_primal_vertices; 1677 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1678 if (!PetscBTLookup(change_basis,i)) { 1679 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1680 j++; 1681 } 1682 } 1683 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1684 ierr = PetscFree(nnz);CHKERRQ(ierr); 1685 /* set values in constraint matrix */ 1686 for (i=0;i<total_primal_vertices;i++) { 1687 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1688 } 1689 total_counts = total_primal_vertices; 1690 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1691 if (!PetscBTLookup(change_basis,i)) { 1692 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1693 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1694 total_counts++; 1695 } 1696 } 1697 /* assembling */ 1698 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1699 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 /* 1701 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1702 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1703 */ 1704 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1705 if (pcbddc->use_change_of_basis) { 1706 /* dual and primal dofs on a single cc */ 1707 PetscInt dual_dofs,primal_dofs; 1708 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1709 PetscInt primal_counter; 1710 /* working stuff for GEQRF */ 1711 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1712 PetscBLASInt lqr_work; 1713 /* working stuff for UNGQR */ 1714 PetscScalar *gqr_work,lgqr_work_t; 1715 PetscBLASInt lgqr_work; 1716 /* working stuff for TRTRS */ 1717 PetscScalar *trs_rhs; 1718 PetscBLASInt Blas_NRHS; 1719 /* pointers for values insertion into change of basis matrix */ 1720 PetscInt *start_rows,*start_cols; 1721 PetscScalar *start_vals; 1722 /* working stuff for values insertion */ 1723 PetscBT is_primal; 1724 1725 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1726 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1727 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1728 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1729 /* work arrays */ 1730 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1731 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1732 /* nonzeros per row */ 1733 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1734 if (PetscBTLookup(change_basis,i)) { 1735 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1736 if (PetscBTLookup(qr_needed_idx,i)) { 1737 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1738 } else { 1739 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1740 /* get local primal index on the cc */ 1741 j = 0; 1742 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1743 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1744 } 1745 } 1746 } 1747 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1748 ierr = PetscFree(nnz);CHKERRQ(ierr); 1749 /* Set initial identity in the matrix */ 1750 for (i=0;i<pcis->n_B;i++) { 1751 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1752 } 1753 1754 if (pcbddc->dbg_flag) { 1755 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1756 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1757 } 1758 1759 1760 /* Now we loop on the constraints which need a change of basis */ 1761 /* 1762 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1763 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1764 1765 Basic blocks of change of basis matrix T computed 1766 1767 - Using the following block transformation if there is only a primal dof on the cc 1768 (in the example, primal dof is the last one of the edge in LOCAL ordering 1769 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1770 | 1 0 ... 0 1 | 1771 | 0 1 ... 0 1 | 1772 | ... | 1773 | 0 ... 1 1 | 1774 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1775 1776 - via QR decomposition of constraints otherwise 1777 */ 1778 if (qr_needed) { 1779 /* space to store Q */ 1780 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1781 /* first we issue queries for optimal work */ 1782 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1783 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1784 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1785 lqr_work = -1; 1786 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1787 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1788 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1789 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1790 lgqr_work = -1; 1791 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1792 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1793 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1794 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1795 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1796 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1797 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1798 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1799 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1800 /* array to store scaling factors for reflectors */ 1801 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1802 /* array to store rhs and solution of triangular solver */ 1803 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1804 /* allocating workspace for check */ 1805 if (pcbddc->dbg_flag) { 1806 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1807 } 1808 } 1809 /* array to store whether a node is primal or not */ 1810 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1811 for (i=0;i<total_primal_vertices;i++) { 1812 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1813 } 1814 1815 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1816 /* -> using implicit ordering contained in temp_indices data */ 1817 total_counts = pcbddc->n_vertices; 1818 primal_counter = total_counts; 1819 while (total_counts<pcbddc->local_primal_size) { 1820 primal_dofs = 1; 1821 if (PetscBTLookup(change_basis,total_counts)) { 1822 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1823 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1824 primal_dofs++; 1825 } 1826 /* get constraint info */ 1827 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1828 dual_dofs = size_of_constraint-primal_dofs; 1829 1830 if (pcbddc->dbg_flag) { 1831 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1832 } 1833 1834 if (primal_dofs > 1) { /* QR */ 1835 1836 /* copy quadrature constraints for change of basis check */ 1837 if (pcbddc->dbg_flag) { 1838 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1839 } 1840 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1841 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1842 1843 /* compute QR decomposition of constraints */ 1844 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1845 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1846 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1847 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1848 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1849 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1850 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1851 1852 /* explictly compute R^-T */ 1853 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1854 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1855 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1856 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1857 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1858 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1859 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1860 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1861 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1862 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1863 1864 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1865 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1866 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1867 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1868 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1869 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1870 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1871 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1872 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1873 1874 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1875 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1876 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1877 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1878 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1879 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1880 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1881 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1882 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1883 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1884 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1885 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1886 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1887 1888 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1889 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1890 /* insert cols for primal dofs */ 1891 for (j=0;j<primal_dofs;j++) { 1892 start_vals = &qr_basis[j*size_of_constraint]; 1893 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1894 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1895 } 1896 /* insert cols for dual dofs */ 1897 for (j=0,k=0;j<dual_dofs;k++) { 1898 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1899 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1900 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1901 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1902 j++; 1903 } 1904 } 1905 1906 /* check change of basis */ 1907 if (pcbddc->dbg_flag) { 1908 PetscInt ii,jj; 1909 PetscBool valid_qr=PETSC_TRUE; 1910 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1911 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1912 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1913 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1914 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1915 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1916 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1917 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1918 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1919 for (jj=0;jj<size_of_constraint;jj++) { 1920 for (ii=0;ii<primal_dofs;ii++) { 1921 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1922 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1923 } 1924 } 1925 if (!valid_qr) { 1926 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1927 for (jj=0;jj<size_of_constraint;jj++) { 1928 for (ii=0;ii<primal_dofs;ii++) { 1929 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1930 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1931 } 1932 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1933 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1934 } 1935 } 1936 } 1937 } else { 1938 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1939 } 1940 } 1941 } else { /* simple transformation block */ 1942 PetscInt row,col; 1943 PetscScalar val; 1944 for (j=0;j<size_of_constraint;j++) { 1945 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 1946 if (!PetscBTLookup(is_primal,row)) { 1947 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1948 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 1949 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 1950 } else { 1951 for (k=0;k<size_of_constraint;k++) { 1952 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1953 if (row != col) { 1954 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1955 } else { 1956 val = 1.0; 1957 } 1958 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 1959 } 1960 } 1961 } 1962 } 1963 /* increment primal counter */ 1964 primal_counter += primal_dofs; 1965 } else { 1966 if (pcbddc->dbg_flag) { 1967 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1968 } 1969 } 1970 /* increment constraint counter total_counts */ 1971 total_counts += primal_dofs; 1972 } 1973 1974 /* free workspace */ 1975 if (qr_needed) { 1976 if (pcbddc->dbg_flag) { 1977 ierr = PetscFree(work);CHKERRQ(ierr); 1978 } 1979 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1980 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1981 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1982 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1983 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1984 } 1985 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1986 /* assembling */ 1987 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1988 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1989 /* 1990 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1991 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1992 */ 1993 } 1994 1995 /* flush dbg viewer */ 1996 if (pcbddc->dbg_flag) { 1997 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1998 } 1999 2000 /* free workspace */ 2001 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2002 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2003 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2004 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2005 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2006 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2007 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2008 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2009 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2010 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2011 PetscFunctionReturn(0); 2012 } 2013 2014 #undef __FUNCT__ 2015 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2016 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2017 { 2018 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2019 PC_IS *pcis = (PC_IS*)pc->data; 2020 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2021 PetscInt bs,ierr,i,vertex_size; 2022 PetscViewer viewer=pcbddc->dbg_viewer; 2023 2024 PetscFunctionBegin; 2025 /* Init local Graph struct */ 2026 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2027 2028 /* Check validity of the csr graph passed in by the user */ 2029 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2030 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2031 } 2032 2033 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2034 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2035 Mat mat_adj; 2036 const PetscInt *xadj,*adjncy; 2037 PetscBool flg_row=PETSC_TRUE; 2038 2039 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2040 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2041 if (!flg_row) { 2042 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2043 } 2044 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2045 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2046 if (!flg_row) { 2047 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2048 } 2049 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2050 } 2051 2052 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2053 vertex_size = 1; 2054 if (!pcbddc->user_provided_isfordofs) { 2055 if (!pcbddc->n_ISForDofs) { 2056 IS *custom_ISForDofs; 2057 2058 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2059 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2060 for (i=0;i<bs;i++) { 2061 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2062 } 2063 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2064 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2065 /* remove my references to IS objects */ 2066 for (i=0;i<bs;i++) { 2067 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2068 } 2069 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2070 } 2071 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2072 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2073 } 2074 2075 /* Setup of Graph */ 2076 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2077 2078 /* Graph's connected components analysis */ 2079 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2080 2081 /* print some info to stdout */ 2082 if (pcbddc->dbg_flag) { 2083 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2084 } 2085 PetscFunctionReturn(0); 2086 } 2087 2088 #undef __FUNCT__ 2089 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2090 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2091 { 2092 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2093 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2094 PetscErrorCode ierr; 2095 2096 PetscFunctionBegin; 2097 n = 0; 2098 vertices = 0; 2099 if (pcbddc->ConstraintMatrix) { 2100 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2101 for (i=0;i<local_primal_size;i++) { 2102 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2103 if (size_of_constraint == 1) n++; 2104 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2105 } 2106 if (vertices_idx) { 2107 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2108 n = 0; 2109 for (i=0;i<local_primal_size;i++) { 2110 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2111 if (size_of_constraint == 1) { 2112 vertices[n++]=row_cmat_indices[0]; 2113 } 2114 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2115 } 2116 } 2117 } 2118 *n_vertices = n; 2119 if (vertices_idx) *vertices_idx = vertices; 2120 PetscFunctionReturn(0); 2121 } 2122 2123 #undef __FUNCT__ 2124 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2125 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2126 { 2127 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2128 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2129 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2130 PetscBT touched; 2131 PetscErrorCode ierr; 2132 2133 /* This function assumes that the number of local constraints per connected component 2134 is not greater than the number of nodes defined for the connected component 2135 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2136 PetscFunctionBegin; 2137 n = 0; 2138 constraints_index = 0; 2139 if (pcbddc->ConstraintMatrix) { 2140 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2141 max_size_of_constraint = 0; 2142 for (i=0;i<local_primal_size;i++) { 2143 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2144 if (size_of_constraint > 1) { 2145 n++; 2146 } 2147 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2148 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2149 } 2150 if (constraints_idx) { 2151 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2152 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2153 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2154 n = 0; 2155 for (i=0;i<local_primal_size;i++) { 2156 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2157 if (size_of_constraint > 1) { 2158 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2159 /* find first untouched local node */ 2160 j = 0; 2161 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2162 min_index = row_cmat_global_indices[j]; 2163 min_loc = j; 2164 /* search the minimum among nodes not yet touched on the connected component 2165 since there can be more than one constraint on a single cc */ 2166 for (j=1;j<size_of_constraint;j++) { 2167 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2168 min_index = row_cmat_global_indices[j]; 2169 min_loc = j; 2170 } 2171 } 2172 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2173 constraints_index[n++] = row_cmat_indices[min_loc]; 2174 } 2175 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2176 } 2177 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2178 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2179 } 2180 } 2181 *n_constraints = n; 2182 if (constraints_idx) *constraints_idx = constraints_index; 2183 PetscFunctionReturn(0); 2184 } 2185 2186 /* the next two functions has been adapted from pcis.c */ 2187 #undef __FUNCT__ 2188 #define __FUNCT__ "PCBDDCApplySchur" 2189 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2190 { 2191 PetscErrorCode ierr; 2192 PC_IS *pcis = (PC_IS*)(pc->data); 2193 2194 PetscFunctionBegin; 2195 if (!vec2_B) { vec2_B = v; } 2196 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2197 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2198 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2199 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2200 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2201 PetscFunctionReturn(0); 2202 } 2203 2204 #undef __FUNCT__ 2205 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2206 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2207 { 2208 PetscErrorCode ierr; 2209 PC_IS *pcis = (PC_IS*)(pc->data); 2210 2211 PetscFunctionBegin; 2212 if (!vec2_B) { vec2_B = v; } 2213 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2214 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2215 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2216 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2217 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2218 PetscFunctionReturn(0); 2219 } 2220 2221 #undef __FUNCT__ 2222 #define __FUNCT__ "PCBDDCSubsetNumbering" 2223 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2224 { 2225 Vec local_vec,global_vec; 2226 IS seqis,paris; 2227 VecScatter scatter_ctx; 2228 PetscScalar *array; 2229 PetscInt *temp_global_dofs; 2230 PetscScalar globalsum; 2231 PetscInt i,j,s; 2232 PetscInt nlocals,first_index,old_index,max_local; 2233 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2234 PetscMPIInt *dof_sizes,*dof_displs; 2235 PetscBool first_found; 2236 PetscErrorCode ierr; 2237 2238 PetscFunctionBegin; 2239 /* mpi buffers */ 2240 MPI_Comm_size(comm,&size_prec_comm); 2241 MPI_Comm_rank(comm,&rank_prec_comm); 2242 j = ( !rank_prec_comm ? size_prec_comm : 0); 2243 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2244 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2245 /* get maximum size of subset */ 2246 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2247 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2248 max_local = 0; 2249 if (n_local_dofs) { 2250 max_local = temp_global_dofs[0]; 2251 for (i=1;i<n_local_dofs;i++) { 2252 if (max_local < temp_global_dofs[i] ) { 2253 max_local = temp_global_dofs[i]; 2254 } 2255 } 2256 } 2257 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2258 max_global++; 2259 max_local = 0; 2260 if (n_local_dofs) { 2261 max_local = local_dofs[0]; 2262 for (i=1;i<n_local_dofs;i++) { 2263 if (max_local < local_dofs[i] ) { 2264 max_local = local_dofs[i]; 2265 } 2266 } 2267 } 2268 max_local++; 2269 /* allocate workspace */ 2270 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2271 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2272 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2273 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2274 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2275 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2276 /* create scatter */ 2277 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2278 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2279 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2280 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2281 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2282 /* init array */ 2283 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2284 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2285 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2286 if (local_dofs_mult) { 2287 for (i=0;i<n_local_dofs;i++) { 2288 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2289 } 2290 } else { 2291 for (i=0;i<n_local_dofs;i++) { 2292 array[local_dofs[i]]=1.0; 2293 } 2294 } 2295 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2296 /* scatter into global vec and get total number of global dofs */ 2297 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2298 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2299 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2300 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2301 /* Fill global_vec with cumulative function for global numbering */ 2302 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2303 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2304 nlocals = 0; 2305 first_index = -1; 2306 first_found = PETSC_FALSE; 2307 for (i=0;i<s;i++) { 2308 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2309 first_found = PETSC_TRUE; 2310 first_index = i; 2311 } 2312 nlocals += (PetscInt)PetscRealPart(array[i]); 2313 } 2314 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2315 if (!rank_prec_comm) { 2316 dof_displs[0]=0; 2317 for (i=1;i<size_prec_comm;i++) { 2318 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2319 } 2320 } 2321 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2322 if (first_found) { 2323 array[first_index] += (PetscScalar)nlocals; 2324 old_index = first_index; 2325 for (i=first_index+1;i<s;i++) { 2326 if (PetscRealPart(array[i]) > 0.0) { 2327 array[i] += array[old_index]; 2328 old_index = i; 2329 } 2330 } 2331 } 2332 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2333 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2334 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2335 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2336 /* get global ordering of local dofs */ 2337 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2338 if (local_dofs_mult) { 2339 for (i=0;i<n_local_dofs;i++) { 2340 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2341 } 2342 } else { 2343 for (i=0;i<n_local_dofs;i++) { 2344 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2345 } 2346 } 2347 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2348 /* free workspace */ 2349 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2350 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2351 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2352 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2353 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2354 /* return pointer to global ordering of local dofs */ 2355 *global_numbering_subset = temp_global_dofs; 2356 PetscFunctionReturn(0); 2357 } 2358 2359 #undef __FUNCT__ 2360 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2361 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2362 { 2363 PetscInt i,j; 2364 PetscScalar *alphas; 2365 PetscErrorCode ierr; 2366 2367 PetscFunctionBegin; 2368 /* this implements stabilized Gram-Schmidt */ 2369 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2370 for (i=0;i<n;i++) { 2371 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2372 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2373 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2374 } 2375 ierr = PetscFree(alphas);CHKERRQ(ierr); 2376 PetscFunctionReturn(0); 2377 } 2378 2379 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2380 #undef __FUNCT__ 2381 #define __FUNCT__ "MatConvert_IS_AIJ" 2382 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2383 { 2384 Mat new_mat; 2385 Mat_IS *matis = (Mat_IS*)(mat->data); 2386 /* info on mat */ 2387 PetscInt bs,rows,cols; 2388 PetscInt lrows,lcols; 2389 PetscInt local_rows,local_cols; 2390 PetscMPIInt nsubdomains; 2391 /* preallocation */ 2392 Vec vec_dnz,vec_onz; 2393 PetscScalar *my_dnz,*my_onz,*array; 2394 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2395 PetscInt index_row,index_col,owner; 2396 PetscInt *local_indices,*global_indices; 2397 /* work */ 2398 PetscInt i,j; 2399 PetscErrorCode ierr; 2400 2401 PetscFunctionBegin; 2402 /* CHECKS */ 2403 /* get info from mat */ 2404 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2405 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2406 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2407 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2408 2409 /* MAT_INITIAL_MATRIX */ 2410 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2411 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2412 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2413 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2414 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2415 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2416 2417 /* 2418 preallocation 2419 */ 2420 2421 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2422 /* 2423 Some vectors are needed to sum up properly on shared interface dofs. 2424 Note that preallocation is not exact, since it overestimates nonzeros 2425 */ 2426 /* 2427 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2428 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2429 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2430 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2431 */ 2432 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2433 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2434 /* All processes need to compute entire row ownership */ 2435 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2436 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2437 for (i=0;i<nsubdomains;i++) { 2438 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2439 row_ownership[j]=i; 2440 } 2441 } 2442 /* map indices of local mat to global values */ 2443 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2444 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2445 for (i=0;i<local_rows;i++) local_indices[i]=i; 2446 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2447 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2448 2449 /* 2450 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2451 then, they will be summed up properly. This way, preallocation is always sufficient 2452 */ 2453 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2454 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2455 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2456 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2457 for (i=0;i<local_rows;i++) { 2458 index_row = global_indices[i]; 2459 for (j=i;j<local_rows;j++) { 2460 owner = row_ownership[index_row]; 2461 index_col = global_indices[j]; 2462 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2463 my_dnz[i] += 1.0; 2464 } else { /* offdiag block */ 2465 my_onz[i] += 1.0; 2466 } 2467 /* same as before, interchanging rows and cols */ 2468 if (i != j) { 2469 owner = row_ownership[index_col]; 2470 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2471 my_dnz[j] += 1.0; 2472 } else { 2473 my_onz[j] += 1.0; 2474 } 2475 } 2476 } 2477 } 2478 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2479 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2480 if (local_rows) { /* multilevel guard */ 2481 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2482 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2483 } 2484 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2485 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2486 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2487 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2488 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2489 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2490 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2491 2492 /* set computed preallocation in dnz and onz */ 2493 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2494 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2495 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2496 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2497 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2498 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2499 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2500 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2501 2502 /* Resize preallocation if overestimated */ 2503 for (i=0;i<lrows;i++) { 2504 dnz[i] = PetscMin(dnz[i],lcols); 2505 onz[i] = PetscMin(onz[i],cols-lcols); 2506 } 2507 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2508 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2509 2510 /* 2511 Set values. Very Basic. 2512 */ 2513 for (i=0;i<local_rows;i++) { 2514 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2515 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2516 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2517 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2518 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2519 } 2520 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2521 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2522 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2523 2524 /* get back mat */ 2525 *M = new_mat; 2526 PetscFunctionReturn(0); 2527 } 2528 2529 #undef __FUNCT__ 2530 #define __FUNCT__ "MatISSubassemble_Private" 2531 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2532 { 2533 Mat subdomain_adj; 2534 IS new_ranks,ranks_send_to; 2535 MatPartitioning partitioner; 2536 Mat_IS *matis; 2537 PetscInt n_neighs,*neighs,*n_shared,**shared; 2538 PetscInt prank; 2539 PetscMPIInt size,rank,color; 2540 PetscInt *xadj,*adjncy,*oldranks; 2541 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2542 PetscInt i,j,n_subdomains,local_size,threshold=0; 2543 PetscErrorCode ierr; 2544 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2545 PetscSubcomm subcomm; 2546 2547 PetscFunctionBegin; 2548 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2549 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2550 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2551 2552 /* Get info on mapping */ 2553 matis = (Mat_IS*)(mat->data); 2554 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2555 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2556 2557 /* build local CSR graph of subdomains' connectivity */ 2558 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2559 xadj[0] = 0; 2560 xadj[1] = PetscMax(n_neighs-1,0); 2561 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2562 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2563 2564 if (threshold) { 2565 PetscInt* count,min_threshold; 2566 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2567 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2568 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2569 for (j=0;j<n_shared[i];j++) { 2570 count[shared[i][j]] += 1; 2571 } 2572 } 2573 /* adapt threshold since we dont want to lose significant connections */ 2574 min_threshold = n_neighs; 2575 for (i=1;i<n_neighs;i++) { 2576 for (j=0;j<n_shared[i];j++) { 2577 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2578 } 2579 } 2580 PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold); 2581 threshold = PetscMax(min_threshold+1,threshold); 2582 PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold); 2583 xadj[1] = 0; 2584 for (i=1;i<n_neighs;i++) { 2585 for (j=0;j<n_shared[i];j++) { 2586 if (count[shared[i][j]] < threshold) { 2587 adjncy[xadj[1]] = neighs[i]; 2588 adjncy_wgt[xadj[1]] = n_shared[i]; 2589 xadj[1]++; 2590 break; 2591 } 2592 } 2593 } 2594 ierr = PetscFree(count);CHKERRQ(ierr); 2595 } else { 2596 if (xadj[1]) { 2597 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2598 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2599 } 2600 } 2601 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2602 if (use_square) { 2603 for (i=0;i<xadj[1];i++) { 2604 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2605 } 2606 } 2607 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2608 2609 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2610 2611 /* 2612 Restrict work on active processes only. 2613 */ 2614 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2615 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2616 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2617 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2618 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2619 if (color) { 2620 ierr = PetscFree(xadj);CHKERRQ(ierr); 2621 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2622 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2623 } else { 2624 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2625 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2626 prank = rank; 2627 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2628 for (i=0;i<size;i++) { 2629 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2630 } 2631 for (i=0;i<xadj[1];i++) { 2632 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2633 } 2634 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2635 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2636 n_subdomains = (PetscInt)size; 2637 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2638 2639 /* Partition */ 2640 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2641 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2642 if (use_vwgt) { 2643 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2644 v_wgt[0] = local_size; 2645 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2646 } 2647 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2648 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2649 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2650 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2651 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2652 2653 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2654 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2655 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2656 /* clean up */ 2657 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2658 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2659 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2660 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2661 } 2662 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2663 2664 /* assemble parallel IS for sends */ 2665 i = 1; 2666 if (color) i=0; 2667 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2668 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2669 2670 /* get back IS */ 2671 *is_sends = ranks_send_to; 2672 PetscFunctionReturn(0); 2673 } 2674 2675 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2676 2677 #undef __FUNCT__ 2678 #define __FUNCT__ "MatISSubassemble" 2679 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2680 { 2681 Mat local_mat,new_mat; 2682 Mat_IS *matis; 2683 IS is_sends_internal; 2684 PetscInt rows,cols; 2685 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2686 PetscBool ismatis,isdense; 2687 ISLocalToGlobalMapping l2gmap; 2688 PetscInt* l2gmap_indices; 2689 const PetscInt* is_indices; 2690 MatType new_local_type; 2691 MatTypePrivate new_local_type_private; 2692 /* buffers */ 2693 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2694 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2695 /* MPI */ 2696 MPI_Comm comm; 2697 PetscMPIInt n_sends,n_recvs,commsize; 2698 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2699 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2700 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2701 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2702 PetscErrorCode ierr; 2703 2704 PetscFunctionBegin; 2705 /* checks */ 2706 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2707 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2708 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2709 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2710 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2711 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2712 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2713 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2714 PetscValidLogicalCollectiveInt(mat,bs,0); 2715 /* prepare IS for sending if not provided */ 2716 if (!is_sends) { 2717 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2718 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2719 } else { 2720 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2721 is_sends_internal = is_sends; 2722 } 2723 2724 /* get pointer of MATIS data */ 2725 matis = (Mat_IS*)mat->data; 2726 2727 /* get comm */ 2728 comm = PetscObjectComm((PetscObject)mat); 2729 2730 /* compute number of sends */ 2731 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2732 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2733 2734 /* compute number of receives */ 2735 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2736 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2737 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2738 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2739 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2740 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2741 ierr = PetscFree(iflags);CHKERRQ(ierr); 2742 2743 /* prepare send/receive buffers */ 2744 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2745 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2746 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2747 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2748 2749 /* Get data from local mat */ 2750 if (!isdense) { 2751 /* TODO: See below some guidelines on how to prepare the local buffers */ 2752 /* 2753 send_buffer_vals should contain the raw values of the local matrix 2754 send_buffer_idxs should contain: 2755 - MatType_PRIVATE type 2756 - PetscInt size_of_l2gmap 2757 - PetscInt global_row_indices[size_of_l2gmap] 2758 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2759 */ 2760 } else { 2761 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2762 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2763 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2764 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2765 send_buffer_idxs[1] = i; 2766 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2767 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2768 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2769 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2770 for (i=0;i<n_sends;i++) { 2771 ilengths_vals[is_indices[i]] = len*len; 2772 ilengths_idxs[is_indices[i]] = len+2; 2773 } 2774 } 2775 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2776 buf_size_idxs = 0; 2777 buf_size_vals = 0; 2778 for (i=0;i<n_recvs;i++) { 2779 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2780 buf_size_vals += (PetscInt)olengths_vals[i]; 2781 } 2782 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2783 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2784 2785 /* get new tags for clean communications */ 2786 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2787 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2788 2789 /* allocate for requests */ 2790 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2791 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2792 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2793 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2794 2795 /* communications */ 2796 ptr_idxs = recv_buffer_idxs; 2797 ptr_vals = recv_buffer_vals; 2798 for (i=0;i<n_recvs;i++) { 2799 source_dest = onodes[i]; 2800 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2801 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2802 ptr_idxs += olengths_idxs[i]; 2803 ptr_vals += olengths_vals[i]; 2804 } 2805 for (i=0;i<n_sends;i++) { 2806 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2807 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2808 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2809 } 2810 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2811 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2812 2813 /* assemble new l2g map */ 2814 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2815 ptr_idxs = recv_buffer_idxs; 2816 buf_size_idxs = 0; 2817 for (i=0;i<n_recvs;i++) { 2818 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2819 ptr_idxs += olengths_idxs[i]; 2820 } 2821 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2822 ptr_idxs = recv_buffer_idxs; 2823 buf_size_idxs = 0; 2824 for (i=0;i<n_recvs;i++) { 2825 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2826 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2827 ptr_idxs += olengths_idxs[i]; 2828 } 2829 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2830 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2831 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2832 2833 /* infer new local matrix type from received local matrices type */ 2834 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2835 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2836 new_local_type_private = MATAIJ_PRIVATE; 2837 new_local_type = MATSEQAIJ; 2838 if (n_recvs) { 2839 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 2840 ptr_idxs = recv_buffer_idxs; 2841 for (i=0;i<n_recvs;i++) { 2842 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2843 new_local_type_private = MATAIJ_PRIVATE; 2844 break; 2845 } 2846 ptr_idxs += olengths_idxs[i]; 2847 } 2848 switch (new_local_type_private) { 2849 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2850 new_local_type = MATSEQAIJ; 2851 bs = 1; 2852 break; 2853 case MATAIJ_PRIVATE: 2854 new_local_type = MATSEQAIJ; 2855 bs = 1; 2856 break; 2857 case MATBAIJ_PRIVATE: 2858 new_local_type = MATSEQBAIJ; 2859 break; 2860 case MATSBAIJ_PRIVATE: 2861 new_local_type = MATSEQSBAIJ; 2862 break; 2863 default: 2864 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2865 break; 2866 } 2867 } 2868 2869 /* create MATIS object */ 2870 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2871 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2872 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2873 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2874 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2875 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2876 2877 /* set values */ 2878 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2879 ptr_vals = recv_buffer_vals; 2880 ptr_idxs = recv_buffer_idxs; 2881 for (i=0;i<n_recvs;i++) { 2882 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2883 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2884 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2885 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2886 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2887 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2888 } 2889 ptr_idxs += olengths_idxs[i]; 2890 ptr_vals += olengths_vals[i]; 2891 } 2892 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2893 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2894 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2895 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2896 2897 { /* check */ 2898 Vec lvec,rvec; 2899 PetscReal infty_error; 2900 2901 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2902 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2903 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2904 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2905 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2906 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2907 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2908 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2909 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2910 } 2911 2912 /* free workspace */ 2913 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2914 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2915 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2916 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2917 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2918 if (isdense) { 2919 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2920 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2921 } else { 2922 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2923 } 2924 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2925 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2926 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2927 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2928 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2929 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2930 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2931 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2932 ierr = PetscFree(onodes);CHKERRQ(ierr); 2933 /* get back new mat */ 2934 *mat_n = new_mat; 2935 PetscFunctionReturn(0); 2936 } 2937 2938 #undef __FUNCT__ 2939 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2940 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2941 { 2942 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2943 PC_IS *pcis = (PC_IS*)pc->data; 2944 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2945 MatNullSpace CoarseNullSpace=NULL; 2946 ISLocalToGlobalMapping coarse_islg; 2947 IS coarse_is; 2948 PetscInt max_it,coarse_size,*local_primal_indices=NULL; 2949 PetscInt im_active=-1,active_procs=-1; 2950 PC pc_temp; 2951 PCType coarse_pc_type; 2952 KSPType coarse_ksp_type; 2953 PetscBool multilevel_requested,multilevel_allowed; 2954 PetscBool setsym,issym,isbddc,isnn; 2955 MatStructure matstruct; 2956 PetscErrorCode ierr; 2957 2958 PetscFunctionBegin; 2959 /* Assign global numbering to coarse dofs */ 2960 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2961 2962 /* infer some info from user */ 2963 issym = PETSC_FALSE; 2964 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2965 multilevel_allowed = PETSC_FALSE; 2966 multilevel_requested = PETSC_FALSE; 2967 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 2968 if (multilevel_requested) { 2969 /* count "active processes" */ 2970 im_active = !!(pcis->n); 2971 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2972 if (active_procs/pcbddc->coarsening_ratio < 2) { 2973 multilevel_allowed = PETSC_FALSE; 2974 } else { 2975 multilevel_allowed = PETSC_TRUE; 2976 } 2977 } 2978 2979 /* set defaults for coarse KSP and PC */ 2980 if (multilevel_allowed) { 2981 if (issym) { 2982 coarse_ksp_type = KSPRICHARDSON; 2983 } else { 2984 coarse_ksp_type = KSPCHEBYSHEV; 2985 } 2986 coarse_pc_type = PCBDDC; 2987 } else { 2988 coarse_ksp_type = KSPPREONLY; 2989 coarse_pc_type = PCREDUNDANT; 2990 } 2991 2992 /* create the coarse KSP object only once with defaults */ 2993 if (!pcbddc->coarse_ksp) { 2994 char prefix[256],str_level[3]; 2995 size_t len; 2996 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 2997 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2998 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 2999 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3000 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3001 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3002 /* prefix */ 3003 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3004 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3005 if (!pcbddc->current_level) { 3006 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3007 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3008 } else { 3009 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3010 if (pcbddc->current_level>1) len -= 2; 3011 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3012 *(prefix+len)='\0'; 3013 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3014 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3015 } 3016 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3017 } 3018 /* allow user customization */ 3019 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3020 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3021 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3022 3023 /* get some info after set from options */ 3024 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3025 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3026 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3027 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3028 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3029 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3030 isbddc = PETSC_FALSE; 3031 } 3032 3033 /* propagate BDDC info to the next level */ 3034 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3035 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3036 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3037 3038 /* creates temporary MATIS object for coarse matrix */ 3039 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3040 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3041 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3042 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3043 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3044 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3045 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3046 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3047 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3048 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 3049 3050 /* assemble coarse matrix */ 3051 if (isbddc || isnn) { 3052 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3053 } else { 3054 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3055 } 3056 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3057 3058 /* create local to global scatters for coarse problem */ 3059 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3060 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3061 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3062 3063 /* propagate symmetry info to coarse matrix */ 3064 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3065 3066 /* Compute coarse null space (special handling by BDDC only) */ 3067 if (pcbddc->NullSpace) { 3068 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3069 if (isbddc) { 3070 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3071 } else { 3072 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3073 } 3074 } 3075 3076 /* set operators */ 3077 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3078 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3079 3080 /* additional KSP customization */ 3081 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3082 if (max_it < 5) { 3083 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3084 } 3085 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3086 3087 3088 /* print some info if requested */ 3089 if (pcbddc->dbg_flag) { 3090 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3091 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3092 if (!multilevel_allowed) { 3093 if (multilevel_requested) { 3094 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3095 } else if (pcbddc->max_levels) { 3096 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3097 } 3098 } 3099 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3100 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3101 } 3102 3103 /* setup coarse ksp */ 3104 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3105 if (pcbddc->dbg_flag) { 3106 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3107 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3108 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3109 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3110 } 3111 3112 /* Check coarse problem if requested */ 3113 if (pcbddc->dbg_flag) { 3114 KSP check_ksp; 3115 KSPType check_ksp_type; 3116 PC check_pc; 3117 Vec check_vec; 3118 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3119 PetscInt its; 3120 PetscBool ispreonly,compute; 3121 3122 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3123 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3124 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3125 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3126 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3127 if (ispreonly) { 3128 check_ksp_type = KSPPREONLY; 3129 compute = PETSC_FALSE; 3130 } else { 3131 if (issym) check_ksp_type = KSPCG; 3132 else check_ksp_type = KSPGMRES; 3133 compute = PETSC_TRUE; 3134 } 3135 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3136 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3137 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3138 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3139 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3140 /* create random vec */ 3141 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3142 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3143 if (CoarseNullSpace) { 3144 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3145 } 3146 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3147 /* solve coarse problem */ 3148 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3149 if (CoarseNullSpace) { 3150 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3151 } 3152 /* check coarse problem residual error */ 3153 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3154 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3155 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3156 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3157 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3158 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3159 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3160 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3161 /* get eigenvalue estimation if preonly has not been requested */ 3162 if (!ispreonly) { 3163 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3164 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3165 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3166 } 3167 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3168 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3169 } 3170 /* free memory */ 3171 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3172 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3173 PetscFunctionReturn(0); 3174 } 3175 3176 #undef __FUNCT__ 3177 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3178 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3179 { 3180 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3181 PC_IS* pcis = (PC_IS*)pc->data; 3182 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3183 PetscInt i,j,coarse_size; 3184 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3185 PetscErrorCode ierr; 3186 3187 PetscFunctionBegin; 3188 /* get indices in local ordering for vertices and constraints */ 3189 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3190 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3191 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3192 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3193 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3194 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3195 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3196 3197 /* Compute global number of coarse dofs */ 3198 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3199 3200 /* check numbering */ 3201 if (pcbddc->dbg_flag) { 3202 PetscScalar coarsesum,*array; 3203 3204 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3205 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3206 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3207 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3208 for (i=0;i<pcbddc->local_primal_size;i++) { 3209 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3210 } 3211 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3212 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3213 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3214 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3215 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3216 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3217 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3218 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3219 for (i=0;i<pcis->n;i++) { 3220 if (array[i] == 1.0) { 3221 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3222 } 3223 } 3224 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3225 for (i=0;i<pcis->n;i++) { 3226 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3227 } 3228 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3229 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3230 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3231 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3232 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3233 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3234 if (pcbddc->dbg_flag > 1) { 3235 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3236 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3237 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3238 for (i=0;i<pcbddc->local_primal_size;i++) { 3239 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]); 3240 } 3241 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3242 } 3243 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3244 } 3245 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3246 /* get back data */ 3247 *coarse_size_n = coarse_size; 3248 *local_primal_indices_n = local_primal_indices; 3249 PetscFunctionReturn(0); 3250 } 3251 3252