1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCResetCustomization" 47 PetscErrorCode PCBDDCResetCustomization(PC pc) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 PetscInt i; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 56 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 59 for (i=0;i<pcbddc->n_ISForDofs;i++) { 60 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 61 } 62 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 63 pcbddc->n_ISForDofs = 0; 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "PCBDDCResetTopography" 69 PetscErrorCode PCBDDCResetTopography(PC pc) 70 { 71 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 72 PetscErrorCode ierr; 73 74 PetscFunctionBegin; 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 102 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "PCBDDCCreateWorkVectors" 111 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 112 { 113 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 114 PC_IS *pcis = (PC_IS*)pc->data; 115 VecType impVecType; 116 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 117 PetscErrorCode ierr; 118 119 PetscFunctionBegin; 120 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 121 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 122 local_primal_size = n_constraints+n_vertices; 123 n_R = pcis->n-n_vertices; 124 /* local work vectors */ 125 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 126 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 127 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 128 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 129 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 130 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 131 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 132 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 133 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 134 if (n_constraints) { 135 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 136 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 137 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 138 } 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "PCBDDCSetUpCorrection" 144 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 145 { 146 PetscErrorCode ierr; 147 /* pointers to pcis and pcbddc */ 148 PC_IS* pcis = (PC_IS*)pc->data; 149 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 150 /* submatrices of local problem */ 151 Mat A_RV,A_VR,A_VV; 152 /* working matrices */ 153 Mat M1,M2,M3,C_CR; 154 /* working vectors */ 155 Vec vec1_C,vec2_C,vec1_V,vec2_V; 156 /* additional working stuff */ 157 IS is_aux; 158 ISLocalToGlobalMapping BtoNmap; 159 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 160 const PetscScalar *array,*row_cmat_values; 161 const PetscInt *row_cmat_indices,*idx_R_local; 162 PetscInt *vertices,*idx_V_B,*auxindices; 163 PetscInt n_vertices,n_constraints,size_of_constraint; 164 PetscInt i,j,n_R,n_D,n_B; 165 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check; 166 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 167 MatType impMatType; 168 /* some shortcuts to scalars */ 169 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 170 /* for debugging purposes */ 171 PetscReal *coarsefunctions_errors,*constraints_errors; 172 173 PetscFunctionBegin; 174 /* get number of vertices and their local indices */ 175 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 176 n_constraints = pcbddc->local_primal_size-n_vertices; 177 /* Set Non-overlapping dimensions */ 178 n_B = pcis->n_B; n_D = pcis->n - n_B; 179 n_R = pcis->n-n_vertices; 180 181 /* Set types for local objects needed by BDDC precondtioner */ 182 impMatType = MATSEQDENSE; 183 184 /* Allocating some extra storage just to be safe */ 185 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 186 for (i=0;i<pcis->n;i++) auxindices[i]=i; 187 188 /* vertices in boundary numbering */ 189 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 190 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 191 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 192 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 193 if (i != n_vertices) { 194 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 195 } 196 197 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 198 if (n_constraints) { 199 /* work vectors */ 200 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 201 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 202 /* auxiliary matrices */ 203 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 204 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 205 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 206 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 207 208 /* Extract constraints on R nodes: C_{CR} */ 209 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 210 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 211 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 212 213 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 214 for (i=0;i<n_constraints;i++) { 215 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 216 /* Get row of constraint matrix in R numbering */ 217 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 218 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 219 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 220 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 221 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 222 /* Solve for row of constraint matrix in R numbering */ 223 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 224 /* Set values in local_auxmat2 */ 225 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 226 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 227 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 228 } 229 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 230 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 231 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 232 233 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 234 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 235 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 236 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 237 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 238 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 239 ierr = MatSetUp(M1);CHKERRQ(ierr); 240 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 241 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 242 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 243 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 244 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 245 ierr = MatDestroy(&M2);CHKERRQ(ierr); 246 ierr = MatDestroy(&M3);CHKERRQ(ierr); 247 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 248 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 249 } 250 251 /* Get submatrices from subdomain matrix */ 252 if (n_vertices) { 253 PetscInt ibs,mbs; 254 PetscBool issbaij; 255 Mat newmat; 256 257 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 258 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 259 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 260 if (ibs != mbs) { /* need to convert to SEQAIJ */ 261 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 262 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 263 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 264 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 265 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 266 } else { 267 /* this is safe */ 268 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 269 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 270 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 271 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 272 /* which of the two approaches is faster? */ 273 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 274 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 275 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 276 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 277 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 278 } else { 279 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 280 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 281 } 282 } 283 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 284 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 285 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 286 } 287 288 /* Matrix of coarse basis functions (local) */ 289 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 290 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 291 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 292 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 293 if (pcbddc->switch_static || pcbddc->dbg_flag) { 294 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 295 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 296 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 297 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 298 } 299 300 if (pcbddc->dbg_flag) { 301 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 302 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 303 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 304 } 305 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 306 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 307 308 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 309 310 /* vertices */ 311 for (i=0;i<n_vertices;i++) { 312 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 313 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 314 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 315 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 316 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 317 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 318 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 319 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 320 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 321 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 322 if (n_constraints) { 323 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 324 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 325 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 326 } 327 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 328 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 329 330 /* Set values in coarse basis function and subdomain part of coarse_mat */ 331 /* coarse basis functions */ 332 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 333 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 334 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 335 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 336 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 337 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 338 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 339 if (pcbddc->switch_static || pcbddc->dbg_flag) { 340 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 341 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 342 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 343 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 344 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 345 } 346 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 347 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 348 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 349 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 350 if (n_constraints) { 351 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 352 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 353 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 354 } 355 356 /* check */ 357 if (pcbddc->dbg_flag) { 358 /* assemble subdomain vector on local nodes */ 359 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 360 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 361 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 362 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 363 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 364 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 365 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 366 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 367 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 368 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 369 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 370 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 371 if (n_constraints) { 372 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 373 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 374 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 375 } 376 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 377 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 378 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 379 /* check saddle point solution */ 380 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 381 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 382 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 383 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 384 /* shift by the identity matrix */ 385 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 386 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 387 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 388 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 389 } 390 } 391 392 /* constraints */ 393 for (i=0;i<n_constraints;i++) { 394 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 395 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 396 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 397 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 398 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 399 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 400 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 401 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 402 if (n_vertices) { 403 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 404 } 405 /* Set values in coarse basis function and subdomain part of coarse_mat */ 406 /* coarse basis functions */ 407 j = i+n_vertices; /* don't touch this! */ 408 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 409 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 410 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 411 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 412 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 413 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 414 if (pcbddc->switch_static || pcbddc->dbg_flag) { 415 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 416 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 417 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 418 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 419 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 420 } 421 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 422 if (n_vertices) { 423 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 424 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 425 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 426 } 427 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 428 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 429 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 430 431 if (pcbddc->dbg_flag) { 432 /* assemble subdomain vector on nodes */ 433 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 434 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 435 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 436 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 437 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 438 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 439 /* assemble subdomain vector of lagrange multipliers */ 440 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 441 if (n_vertices) { 442 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 443 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 444 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 445 } 446 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 447 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 448 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 449 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 450 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 451 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 452 /* check saddle point solution */ 453 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 454 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 455 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 456 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 457 /* shift by the identity matrix */ 458 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 459 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 460 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 461 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 462 } 463 } 464 /* call assembling routines for local coarse basis */ 465 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 467 if (pcbddc->switch_static || pcbddc->dbg_flag) { 468 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 469 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 470 } 471 472 /* compute other basis functions for non-symmetric problems */ 473 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 474 if (!setsym || (setsym && !issym)) { 475 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 476 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 477 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 478 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 479 if (pcbddc->switch_static || pcbddc->dbg_flag) { 480 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 481 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 482 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 483 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 484 } 485 for (i=0;i<pcbddc->local_primal_size;i++) { 486 if (n_constraints) { 487 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 488 for (j=0;j<n_constraints;j++) { 489 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 490 } 491 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 492 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 493 } 494 if (i<n_vertices) { 495 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 496 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 497 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 498 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 499 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 500 if (n_constraints) { 501 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 502 } 503 } else { 504 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 505 } 506 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 507 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 508 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 509 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 510 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 511 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 512 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 513 if (i<n_vertices) { 514 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 515 } 516 if (pcbddc->switch_static || pcbddc->dbg_flag) { 517 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 518 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 519 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 520 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 521 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 522 } 523 524 if (pcbddc->dbg_flag) { 525 /* assemble subdomain vector on nodes */ 526 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 527 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 528 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 529 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 530 if (i<n_vertices) { 531 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 532 } 533 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 534 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 535 /* assemble subdomain vector of lagrange multipliers */ 536 for (j=0;j<pcbddc->local_primal_size;j++) { 537 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 538 } 539 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 540 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 541 /* check saddle point solution */ 542 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 543 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 544 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 545 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 546 /* shift by the identity matrix */ 547 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 548 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 549 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 550 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 551 } 552 } 553 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 555 if (pcbddc->switch_static || pcbddc->dbg_flag) { 556 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 557 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 558 } 559 unsymmetric_check = PETSC_TRUE; 560 } else { /* take references to already computed coarse basis */ 561 unsymmetric_check = PETSC_FALSE; 562 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 563 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 564 if (pcbddc->coarse_phi_D) { 565 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 566 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 567 } 568 } 569 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 570 /* Checking coarse_sub_mat and coarse basis functios */ 571 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 572 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 573 if (pcbddc->dbg_flag) { 574 Mat coarse_sub_mat; 575 Mat AUXMAT,TM1,TM2,TM3,TM4; 576 Mat coarse_phi_D,coarse_phi_B; 577 Mat coarse_psi_D,coarse_psi_B; 578 Mat A_II,A_BB,A_IB,A_BI; 579 MatType checkmattype=MATSEQAIJ; 580 PetscReal real_value; 581 582 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 583 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 584 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 585 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 586 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 587 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 588 if (unsymmetric_check) { 589 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 590 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 591 } 592 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 593 594 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 595 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 596 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 597 if (unsymmetric_check) { 598 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 599 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 600 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 601 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 602 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 603 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 604 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 605 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 606 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 607 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 608 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 609 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 610 } else { 611 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 612 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 613 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 614 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 615 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 616 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 617 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 618 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 619 } 620 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 621 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 622 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 623 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 624 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 625 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 628 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 629 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 630 for (i=0;i<pcbddc->local_primal_size;i++) { 631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 632 } 633 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 634 for (i=0;i<pcbddc->local_primal_size;i++) { 635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 636 } 637 if (unsymmetric_check) { 638 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 639 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 640 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 641 } 642 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 643 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 644 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 645 } 646 } 647 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 648 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 649 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 650 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 651 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 652 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 653 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 654 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 655 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 656 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 657 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 658 if (unsymmetric_check) { 659 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 660 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 661 } 662 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 663 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 664 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 665 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 666 } 667 /* free memory */ 668 if (n_vertices) { 669 ierr = PetscFree(vertices);CHKERRQ(ierr); 670 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 671 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 672 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 673 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 674 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 675 } 676 if (n_constraints) { 677 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 678 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 679 ierr = MatDestroy(&M1);CHKERRQ(ierr); 680 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 681 } 682 ierr = PetscFree(auxindices);CHKERRQ(ierr); 683 /* get back data */ 684 *coarse_submat_vals_n = coarse_submat_vals; 685 PetscFunctionReturn(0); 686 } 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 690 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 691 { 692 PC_IS* pcis = (PC_IS*)(pc->data); 693 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 694 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 695 PetscBool issbaij,isbaij; 696 /* manage repeated solves */ 697 MatReuse reuse; 698 MatStructure matstruct; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 /* get mat flags */ 703 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 704 reuse = MAT_INITIAL_MATRIX; 705 if (pc->setupcalled) { 706 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 707 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 708 if (matstruct == SAME_NONZERO_PATTERN) { 709 reuse = MAT_REUSE_MATRIX; 710 } else { 711 reuse = MAT_INITIAL_MATRIX; 712 } 713 } 714 if (reuse == MAT_INITIAL_MATRIX) { 715 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 717 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 718 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 719 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 720 } 721 722 /* transform local matrices if needed */ 723 if (pcbddc->use_change_of_basis) { 724 Mat change_mat_all; 725 PetscScalar *row_cmat_values; 726 PetscInt *row_cmat_indices; 727 PetscInt *nnz,*is_indices,*temp_indices; 728 PetscInt i,j,k,n_D,n_B; 729 730 /* Get Non-overlapping dimensions */ 731 n_B = pcis->n_B; 732 n_D = pcis->n-n_B; 733 734 /* compute nonzero structure of change of basis on all local nodes */ 735 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 736 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 737 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 738 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 739 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 740 k=1; 741 for (i=0;i<n_B;i++) { 742 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 743 nnz[is_indices[i]]=j; 744 if (k < j) k = j; 745 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 746 } 747 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 748 /* assemble change of basis matrix on the whole set of local dofs */ 749 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 750 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 751 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 752 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 753 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 754 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 755 for (i=0;i<n_D;i++) { 756 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 757 } 758 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 759 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 760 for (i=0;i<n_B;i++) { 761 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 762 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 763 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 764 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 765 } 766 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 768 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 769 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 770 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 771 if (!issbaij && !isbaij) { 772 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 773 } else { 774 Mat work_mat; 775 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 776 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 777 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 778 } 779 /* 780 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 781 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 782 */ 783 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 784 ierr = PetscFree(nnz);CHKERRQ(ierr); 785 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 786 } else { 787 /* without change of basis, the local matrix is unchanged */ 788 if (!pcbddc->local_mat) { 789 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 790 pcbddc->local_mat = matis->A; 791 } 792 } 793 794 /* get submatrices */ 795 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 796 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 797 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 798 if (!issbaij) { 799 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 800 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 801 } else { 802 Mat newmat; 803 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 804 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 805 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 806 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 807 } 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 813 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 814 { 815 PC_IS* pcis = (PC_IS*)(pc->data); 816 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 817 IS is_aux1,is_aux2; 818 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 819 PetscInt n_vertices,i,j,n_R,n_D,n_B; 820 PetscInt vbs,bs; 821 PetscBT bitmask; 822 PetscErrorCode ierr; 823 824 PetscFunctionBegin; 825 /* Set Non-overlapping dimensions */ 826 n_B = pcis->n_B; n_D = pcis->n - n_B; 827 /* get vertex indices from constraint matrix */ 828 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 829 830 /* create auxiliary bitmask */ 831 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 832 for (i=0;i<n_vertices;i++) { 833 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 834 } 835 836 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 837 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 838 for (i=0, n_R=0; i<pcis->n; i++) { 839 if (!PetscBTLookup(bitmask,i)) { 840 idx_R_local[n_R] = i; 841 n_R++; 842 } 843 } 844 845 /* Block code */ 846 vbs = 1; 847 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 848 if (bs>1 && !(n_vertices%bs)) { 849 PetscBool is_blocked = PETSC_TRUE; 850 PetscInt *vary; 851 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 852 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 853 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 854 for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++; 855 for (i=0; i<n_vertices; i++) { 856 if (vary[i]!=0 && vary[i]!=bs) { 857 is_blocked = PETSC_FALSE; 858 break; 859 } 860 } 861 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 862 vbs = bs; 863 for (i=0;i<n_R/vbs;i++) { 864 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 865 } 866 } 867 ierr = PetscFree(vary);CHKERRQ(ierr); 868 } 869 ierr = PetscFree(vertices);CHKERRQ(ierr); 870 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 871 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 872 873 /* print some info if requested */ 874 if (pcbddc->dbg_flag) { 875 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 876 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 877 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 878 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 879 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 880 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 881 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 882 } 883 884 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 885 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 886 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 887 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 888 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 889 for (i=0; i<n_D; i++) { 890 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 891 } 892 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 893 for (i=0, j=0; i<n_R; i++) { 894 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 895 aux_array1[j++] = i; 896 } 897 } 898 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 899 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 900 for (i=0, j=0; i<n_B; i++) { 901 if (!PetscBTLookup(bitmask,is_indices[i])) { 902 aux_array2[j++] = i; 903 } 904 } 905 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 906 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 907 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 908 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 909 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 910 911 if (pcbddc->switch_static || pcbddc->dbg_flag) { 912 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 913 for (i=0, j=0; i<n_R; i++) { 914 if (PetscBTLookup(bitmask,idx_R_local[i])) { 915 aux_array1[j++] = i; 916 } 917 } 918 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 919 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 920 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 921 } 922 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 923 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 924 PetscFunctionReturn(0); 925 } 926 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 930 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 931 { 932 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 933 PC_IS *pcis = (PC_IS*)pc->data; 934 PC pc_temp; 935 Mat A_RR; 936 Vec vec1,vec2,vec3; 937 MatStructure matstruct; 938 PetscScalar m_one = -1.0; 939 PetscReal value; 940 PetscInt n_D,n_R,ibs,mbs; 941 PetscBool use_exact,use_exact_reduced,issbaij; 942 PetscErrorCode ierr; 943 /* prefixes stuff */ 944 char dir_prefix[256],neu_prefix[256],str_level[3]; 945 size_t len; 946 947 PetscFunctionBegin; 948 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 949 950 /* compute prefixes */ 951 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 952 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 953 if (!pcbddc->current_level) { 954 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 955 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 956 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 957 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 958 } else { 959 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 960 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 961 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 962 len -= 15; /* remove "pc_bddc_coarse_" */ 963 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 964 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 965 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 966 *(dir_prefix+len)='\0'; 967 *(neu_prefix+len)='\0'; 968 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 969 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 970 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 971 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 972 } 973 974 /* DIRICHLET PROBLEM */ 975 /* Matrix for Dirichlet problem is pcis->A_II */ 976 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 977 if (!pcbddc->ksp_D) { /* create object if not yet build */ 978 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 979 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 980 /* default */ 981 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 982 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 983 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 984 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 985 if (issbaij) { 986 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 987 } else { 988 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 989 } 990 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 991 } 992 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 993 /* Allow user's customization */ 994 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 995 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 996 if (!n_D) { 997 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 998 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 999 } 1000 /* Set Up KSP for Dirichlet problem of BDDC */ 1001 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1002 /* set ksp_D into pcis data */ 1003 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1004 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1005 pcis->ksp_D = pcbddc->ksp_D; 1006 1007 /* NEUMANN PROBLEM */ 1008 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1009 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1010 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1011 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1012 if (ibs != mbs) { 1013 Mat newmat; 1014 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1015 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1016 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1017 } else { 1018 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1019 } 1020 if (!pcbddc->ksp_R) { /* create object if not yet build */ 1021 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1022 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1023 /* default */ 1024 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1025 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1026 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1027 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1028 if (issbaij) { 1029 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1030 } else { 1031 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1032 } 1033 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1034 } 1035 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1036 /* Allow user's customization */ 1037 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1038 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1039 if (!n_R) { 1040 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1041 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1042 } 1043 /* Set Up KSP for Neumann problem of BDDC */ 1044 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1045 1046 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1047 1048 /* Dirichlet */ 1049 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 1050 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1051 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1052 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 1053 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 1054 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1055 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1056 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1057 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1058 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1059 /* need to be adapted? */ 1060 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1061 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1062 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1063 /* print info */ 1064 if (pcbddc->dbg_flag) { 1065 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1066 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1067 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1068 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1069 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1070 } 1071 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1072 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1073 } 1074 1075 /* Neumann */ 1076 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 1077 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1078 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1079 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 1080 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 1081 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1082 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1083 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1084 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1085 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1086 /* need to be adapted? */ 1087 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1088 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1089 /* print info */ 1090 if (pcbddc->dbg_flag) { 1091 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1092 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1093 } 1094 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1095 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1096 } 1097 /* free Neumann problem's matrix */ 1098 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1104 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1105 { 1106 PetscErrorCode ierr; 1107 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1108 1109 PetscFunctionBegin; 1110 if (applytranspose) { 1111 if (pcbddc->local_auxmat1) { 1112 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1113 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1114 } 1115 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1116 } else { 1117 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1118 if (pcbddc->local_auxmat1) { 1119 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1120 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1121 } 1122 } 1123 PetscFunctionReturn(0); 1124 } 1125 1126 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1129 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1130 { 1131 PetscErrorCode ierr; 1132 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1133 PC_IS* pcis = (PC_IS*) (pc->data); 1134 const PetscScalar zero = 0.0; 1135 1136 PetscFunctionBegin; 1137 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1138 if (applytranspose) { 1139 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1140 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1141 } else { 1142 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1143 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1144 } 1145 /* Scatter data of coarse_rhs */ 1146 ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 1147 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1148 1149 /* Local solution on R nodes */ 1150 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1151 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1152 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1153 if (pcbddc->switch_static) { 1154 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1155 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1156 } 1157 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1158 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1159 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1160 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1161 if (pcbddc->switch_static) { 1162 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1163 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1164 } 1165 1166 /* Coarse solution */ 1167 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1168 /* TODO remove null space when doing multilevel */ 1169 if (applytranspose) { 1170 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1171 } else { 1172 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1173 } 1174 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1175 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1176 1177 /* Sum contributions from two levels */ 1178 if (applytranspose) { 1179 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1180 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1181 } else { 1182 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1183 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1190 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1191 { 1192 PetscErrorCode ierr; 1193 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1194 1195 PetscFunctionBegin; 1196 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1197 PetscFunctionReturn(0); 1198 } 1199 1200 #undef __FUNCT__ 1201 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1202 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1203 { 1204 PetscErrorCode ierr; 1205 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1206 1207 PetscFunctionBegin; 1208 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 /* uncomment for testing purposes */ 1213 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1214 #undef __FUNCT__ 1215 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1216 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1217 { 1218 PetscErrorCode ierr; 1219 PC_IS* pcis = (PC_IS*)(pc->data); 1220 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1221 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1222 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1223 MatType impMatType=MATSEQAIJ; 1224 /* one and zero */ 1225 PetscScalar one=1.0,zero=0.0; 1226 /* space to store constraints and their local indices */ 1227 PetscScalar *temp_quadrature_constraint; 1228 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1229 /* iterators */ 1230 PetscInt i,j,k,total_counts,temp_start_ptr; 1231 /* stuff to store connected components stored in pcbddc->mat_graph */ 1232 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1233 PetscInt n_ISForFaces,n_ISForEdges; 1234 /* near null space stuff */ 1235 MatNullSpace nearnullsp; 1236 const Vec *nearnullvecs; 1237 Vec *localnearnullsp; 1238 PetscBool nnsp_has_cnst; 1239 PetscInt nnsp_size; 1240 PetscScalar *array; 1241 /* BLAS integers */ 1242 PetscBLASInt lwork,lierr; 1243 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1244 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1245 /* LAPACK working arrays for SVD or POD */ 1246 PetscBool skip_lapack; 1247 PetscScalar *work; 1248 PetscReal *singular_vals; 1249 #if defined(PETSC_USE_COMPLEX) 1250 PetscReal *rwork; 1251 #endif 1252 #if defined(PETSC_MISSING_LAPACK_GESVD) 1253 PetscBLASInt Blas_one_2=1; 1254 PetscScalar *temp_basis,*correlation_mat; 1255 #else 1256 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1257 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1258 #endif 1259 /* change of basis */ 1260 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1261 PetscBool boolforchange,qr_needed; 1262 PetscBT touched,change_basis,qr_needed_idx; 1263 /* auxiliary stuff */ 1264 PetscInt *nnz,*is_indices,*local_to_B; 1265 /* some quantities */ 1266 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1267 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1268 1269 1270 PetscFunctionBegin; 1271 /* Get index sets for faces, edges and vertices from graph */ 1272 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1273 pcbddc->use_vertices = PETSC_TRUE; 1274 } 1275 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1276 /* HACK: provide functions to set change of basis */ 1277 if (!ISForVertices && pcbddc->NullSpace) { 1278 pcbddc->use_change_of_basis = PETSC_TRUE; 1279 pcbddc->use_change_on_faces = PETSC_FALSE; 1280 } 1281 /* print some info */ 1282 if (pcbddc->dbg_flag) { 1283 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1284 i = 0; 1285 if (ISForVertices) { 1286 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1287 } 1288 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1289 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1290 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1291 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1292 } 1293 /* check if near null space is attached to global mat */ 1294 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1295 if (nearnullsp) { 1296 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1297 } else { /* if near null space is not provided BDDC uses constants by default */ 1298 nnsp_size = 0; 1299 nnsp_has_cnst = PETSC_TRUE; 1300 } 1301 /* get max number of constraints on a single cc */ 1302 max_constraints = nnsp_size; 1303 if (nnsp_has_cnst) max_constraints++; 1304 1305 /* 1306 Evaluate maximum storage size needed by the procedure 1307 - temp_indices will contain start index of each constraint stored as follows 1308 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1309 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1310 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1311 */ 1312 total_counts = n_ISForFaces+n_ISForEdges; 1313 total_counts *= max_constraints; 1314 n_vertices = 0; 1315 if (ISForVertices) { 1316 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1317 } 1318 total_counts += n_vertices; 1319 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1320 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1321 total_counts = 0; 1322 max_size_of_constraint = 0; 1323 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1324 if (i<n_ISForEdges) { 1325 used_IS = &ISForEdges[i]; 1326 } else { 1327 used_IS = &ISForFaces[i-n_ISForEdges]; 1328 } 1329 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1330 total_counts += j; 1331 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1332 } 1333 total_counts *= max_constraints; 1334 total_counts += n_vertices; 1335 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1336 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1337 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1338 /* local to boundary numbering */ 1339 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1340 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1341 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1342 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1343 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1344 /* get local part of global near null space vectors */ 1345 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1346 for (k=0;k<nnsp_size;k++) { 1347 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1348 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1349 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1350 } 1351 1352 /* whether or not to skip lapack calls */ 1353 skip_lapack = PETSC_TRUE; 1354 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1355 1356 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1357 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1358 PetscScalar temp_work; 1359 #if defined(PETSC_MISSING_LAPACK_GESVD) 1360 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1361 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1362 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1363 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1364 #if defined(PETSC_USE_COMPLEX) 1365 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1366 #endif 1367 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1368 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1369 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1370 lwork = -1; 1371 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1372 #if !defined(PETSC_USE_COMPLEX) 1373 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1374 #else 1375 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1376 #endif 1377 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1378 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1379 #else /* on missing GESVD */ 1380 /* SVD */ 1381 PetscInt max_n,min_n; 1382 max_n = max_size_of_constraint; 1383 min_n = max_constraints; 1384 if (max_size_of_constraint < max_constraints) { 1385 min_n = max_size_of_constraint; 1386 max_n = max_constraints; 1387 } 1388 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1389 #if defined(PETSC_USE_COMPLEX) 1390 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1391 #endif 1392 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1393 lwork = -1; 1394 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1395 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1396 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1397 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1398 #if !defined(PETSC_USE_COMPLEX) 1399 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1400 #else 1401 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1402 #endif 1403 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1404 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1405 #endif /* on missing GESVD */ 1406 /* Allocate optimal workspace */ 1407 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1408 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1409 } 1410 /* Now we can loop on constraining sets */ 1411 total_counts = 0; 1412 temp_indices[0] = 0; 1413 /* vertices */ 1414 if (ISForVertices) { 1415 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1416 if (nnsp_has_cnst) { /* consider all vertices */ 1417 for (i=0;i<n_vertices;i++) { 1418 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1419 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1420 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1421 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1422 total_counts++; 1423 } 1424 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1425 PetscBool used_vertex; 1426 for (i=0;i<n_vertices;i++) { 1427 used_vertex = PETSC_FALSE; 1428 k = 0; 1429 while (!used_vertex && k<nnsp_size) { 1430 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1431 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1432 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1433 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1434 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1435 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1436 total_counts++; 1437 used_vertex = PETSC_TRUE; 1438 } 1439 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1440 k++; 1441 } 1442 } 1443 } 1444 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1445 n_vertices = total_counts; 1446 } 1447 1448 /* edges and faces */ 1449 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1450 if (i<n_ISForEdges) { 1451 used_IS = &ISForEdges[i]; 1452 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1453 } else { 1454 used_IS = &ISForFaces[i-n_ISForEdges]; 1455 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1456 } 1457 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1458 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1459 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1460 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1461 /* change of basis should not be performed on local periodic nodes */ 1462 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1463 if (nnsp_has_cnst) { 1464 PetscScalar quad_value; 1465 temp_constraints++; 1466 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1467 for (j=0;j<size_of_constraint;j++) { 1468 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1469 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1470 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1471 } 1472 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1473 total_counts++; 1474 } 1475 for (k=0;k<nnsp_size;k++) { 1476 PetscReal real_value; 1477 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1478 for (j=0;j<size_of_constraint;j++) { 1479 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1480 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1481 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1482 } 1483 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1484 /* check if array is null on the connected component */ 1485 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1486 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1487 if (real_value > 0.0) { /* keep indices and values */ 1488 temp_constraints++; 1489 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1490 total_counts++; 1491 } 1492 } 1493 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1494 valid_constraints = temp_constraints; 1495 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1496 if (!pcbddc->use_nnsp_true && temp_constraints) { 1497 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1498 1499 #if defined(PETSC_MISSING_LAPACK_GESVD) 1500 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1501 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1502 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1503 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1504 from that computed using LAPACKgesvd 1505 -> This is due to a different computation of eigenvectors in LAPACKheev 1506 -> The quality of the POD-computed basis will be the same */ 1507 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1508 /* Store upper triangular part of correlation matrix */ 1509 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1510 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1511 for (j=0;j<temp_constraints;j++) { 1512 for (k=0;k<j+1;k++) { 1513 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1514 } 1515 } 1516 /* compute eigenvalues and eigenvectors of correlation matrix */ 1517 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1518 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1519 #if !defined(PETSC_USE_COMPLEX) 1520 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1521 #else 1522 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1523 #endif 1524 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1525 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1526 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1527 j=0; 1528 while (j < temp_constraints && singular_vals[j] < tol) j++; 1529 total_counts=total_counts-j; 1530 valid_constraints = temp_constraints-j; 1531 /* scale and copy POD basis into used quadrature memory */ 1532 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1533 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1534 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1535 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1536 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1537 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1538 if (j<temp_constraints) { 1539 PetscInt ii; 1540 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1541 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1542 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1543 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1544 for (k=0;k<temp_constraints-j;k++) { 1545 for (ii=0;ii<size_of_constraint;ii++) { 1546 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1547 } 1548 } 1549 } 1550 #else /* on missing GESVD */ 1551 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1552 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1553 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1554 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1555 #if !defined(PETSC_USE_COMPLEX) 1556 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1557 #else 1558 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1559 #endif 1560 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1561 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1562 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1563 k = temp_constraints; 1564 if (k > size_of_constraint) k = size_of_constraint; 1565 j = 0; 1566 while (j < k && singular_vals[k-j-1] < tol) j++; 1567 total_counts = total_counts-temp_constraints+k-j; 1568 valid_constraints = k-j; 1569 #endif /* on missing GESVD */ 1570 } 1571 /* setting change_of_basis flag is safe now */ 1572 if (boolforchange) { 1573 for (j=0;j<valid_constraints;j++) { 1574 PetscBTSet(change_basis,total_counts-j-1); 1575 } 1576 } 1577 } 1578 /* free index sets of faces, edges and vertices */ 1579 for (i=0;i<n_ISForFaces;i++) { 1580 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1581 } 1582 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1583 for (i=0;i<n_ISForEdges;i++) { 1584 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1585 } 1586 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1587 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1588 1589 /* free workspace */ 1590 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1591 ierr = PetscFree(work);CHKERRQ(ierr); 1592 #if defined(PETSC_USE_COMPLEX) 1593 ierr = PetscFree(rwork);CHKERRQ(ierr); 1594 #endif 1595 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1596 #if defined(PETSC_MISSING_LAPACK_GESVD) 1597 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1598 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1599 #endif 1600 } 1601 for (k=0;k<nnsp_size;k++) { 1602 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1603 } 1604 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1605 1606 /* set quantities in pcbddc data structure */ 1607 /* n_vertices defines the number of subdomain corners in the primal space */ 1608 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1609 pcbddc->local_primal_size = total_counts; 1610 pcbddc->n_vertices = n_vertices; 1611 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1612 1613 /* Create constraint matrix */ 1614 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1615 /* so we need to set it up properly either with or without change of basis */ 1616 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1617 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1618 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1619 /* array to compute a local numbering of constraints : vertices first then constraints */ 1620 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1621 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1622 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1623 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1624 /* auxiliary stuff for basis change */ 1625 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1626 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1627 1628 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1629 total_primal_vertices=0; 1630 for (i=0;i<pcbddc->local_primal_size;i++) { 1631 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1632 if (size_of_constraint == 1) { 1633 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1634 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1635 aux_primal_minloc[total_primal_vertices]=0; 1636 total_primal_vertices++; 1637 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1638 PetscInt min_loc,min_index; 1639 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1640 /* find first untouched local node */ 1641 k = 0; 1642 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1643 min_index = global_indices[k]; 1644 min_loc = k; 1645 /* search the minimum among global nodes already untouched on the cc */ 1646 for (k=1;k<size_of_constraint;k++) { 1647 /* there can be more than one constraint on a single connected component */ 1648 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1649 min_index = global_indices[k]; 1650 min_loc = k; 1651 } 1652 } 1653 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1654 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1655 aux_primal_minloc[total_primal_vertices]=min_loc; 1656 total_primal_vertices++; 1657 } 1658 } 1659 /* determine if a QR strategy is needed for change of basis */ 1660 qr_needed = PETSC_FALSE; 1661 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1662 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1663 if (PetscBTLookup(change_basis,i)) { 1664 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1665 j = 0; 1666 for (k=0;k<size_of_constraint;k++) { 1667 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1668 j++; 1669 } 1670 } 1671 /* found more than one primal dof on the cc */ 1672 if (j > 1) { 1673 PetscBTSet(qr_needed_idx,i); 1674 qr_needed = PETSC_TRUE; 1675 } 1676 } 1677 } 1678 /* free workspace */ 1679 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1680 1681 /* permute indices in order to have a sorted set of vertices */ 1682 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1683 1684 /* nonzero structure of constraint matrix */ 1685 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1686 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1687 j=total_primal_vertices; 1688 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1689 if (!PetscBTLookup(change_basis,i)) { 1690 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1691 j++; 1692 } 1693 } 1694 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1695 ierr = PetscFree(nnz);CHKERRQ(ierr); 1696 /* set values in constraint matrix */ 1697 for (i=0;i<total_primal_vertices;i++) { 1698 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1699 } 1700 total_counts = total_primal_vertices; 1701 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1702 if (!PetscBTLookup(change_basis,i)) { 1703 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1704 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1705 total_counts++; 1706 } 1707 } 1708 /* assembling */ 1709 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1710 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1711 /* 1712 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1713 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1714 */ 1715 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1716 if (pcbddc->use_change_of_basis) { 1717 /* dual and primal dofs on a single cc */ 1718 PetscInt dual_dofs,primal_dofs; 1719 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1720 PetscInt primal_counter; 1721 /* working stuff for GEQRF */ 1722 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1723 PetscBLASInt lqr_work; 1724 /* working stuff for UNGQR */ 1725 PetscScalar *gqr_work,lgqr_work_t; 1726 PetscBLASInt lgqr_work; 1727 /* working stuff for TRTRS */ 1728 PetscScalar *trs_rhs; 1729 PetscBLASInt Blas_NRHS; 1730 /* pointers for values insertion into change of basis matrix */ 1731 PetscInt *start_rows,*start_cols; 1732 PetscScalar *start_vals; 1733 /* working stuff for values insertion */ 1734 PetscBT is_primal; 1735 1736 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1737 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1738 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1739 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1740 /* work arrays */ 1741 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1742 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1743 /* nonzeros per row */ 1744 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1745 if (PetscBTLookup(change_basis,i)) { 1746 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1747 if (PetscBTLookup(qr_needed_idx,i)) { 1748 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1749 } else { 1750 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1751 /* get local primal index on the cc */ 1752 j = 0; 1753 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1754 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1755 } 1756 } 1757 } 1758 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1759 ierr = PetscFree(nnz);CHKERRQ(ierr); 1760 /* Set initial identity in the matrix */ 1761 for (i=0;i<pcis->n_B;i++) { 1762 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1763 } 1764 1765 if (pcbddc->dbg_flag) { 1766 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1767 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1768 } 1769 1770 1771 /* Now we loop on the constraints which need a change of basis */ 1772 /* 1773 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1774 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1775 1776 Basic blocks of change of basis matrix T computed 1777 1778 - Using the following block transformation if there is only a primal dof on the cc 1779 (in the example, primal dof is the last one of the edge in LOCAL ordering 1780 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1781 | 1 0 ... 0 1 | 1782 | 0 1 ... 0 1 | 1783 | ... | 1784 | 0 ... 1 1 | 1785 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1786 1787 - via QR decomposition of constraints otherwise 1788 */ 1789 if (qr_needed) { 1790 /* space to store Q */ 1791 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1792 /* first we issue queries for optimal work */ 1793 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1794 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1795 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1796 lqr_work = -1; 1797 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1798 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1799 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1800 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1801 lgqr_work = -1; 1802 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1803 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1804 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1805 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1806 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1807 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1808 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1809 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1810 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1811 /* array to store scaling factors for reflectors */ 1812 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1813 /* array to store rhs and solution of triangular solver */ 1814 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1815 /* allocating workspace for check */ 1816 if (pcbddc->dbg_flag) { 1817 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1818 } 1819 } 1820 /* array to store whether a node is primal or not */ 1821 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1822 for (i=0;i<total_primal_vertices;i++) { 1823 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1824 } 1825 1826 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1827 /* -> using implicit ordering contained in temp_indices data */ 1828 total_counts = pcbddc->n_vertices; 1829 primal_counter = total_counts; 1830 while (total_counts<pcbddc->local_primal_size) { 1831 primal_dofs = 1; 1832 if (PetscBTLookup(change_basis,total_counts)) { 1833 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1834 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1835 primal_dofs++; 1836 } 1837 /* get constraint info */ 1838 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1839 dual_dofs = size_of_constraint-primal_dofs; 1840 1841 if (pcbddc->dbg_flag) { 1842 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1843 } 1844 1845 if (primal_dofs > 1) { /* QR */ 1846 1847 /* copy quadrature constraints for change of basis check */ 1848 if (pcbddc->dbg_flag) { 1849 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1850 } 1851 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1852 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1853 1854 /* compute QR decomposition of constraints */ 1855 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1856 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1857 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1858 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1859 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1860 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1861 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1862 1863 /* explictly compute R^-T */ 1864 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1865 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1866 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1867 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1868 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1869 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1870 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1871 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1872 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1873 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1874 1875 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1876 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1877 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1878 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1879 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1880 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1881 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1882 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1883 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1884 1885 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1886 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1887 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1888 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1889 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1890 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1891 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1892 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1893 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1894 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1895 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1896 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1897 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1898 1899 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1900 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1901 /* insert cols for primal dofs */ 1902 for (j=0;j<primal_dofs;j++) { 1903 start_vals = &qr_basis[j*size_of_constraint]; 1904 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1905 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1906 } 1907 /* insert cols for dual dofs */ 1908 for (j=0,k=0;j<dual_dofs;k++) { 1909 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1910 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1911 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1912 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1913 j++; 1914 } 1915 } 1916 1917 /* check change of basis */ 1918 if (pcbddc->dbg_flag) { 1919 PetscInt ii,jj; 1920 PetscBool valid_qr=PETSC_TRUE; 1921 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1922 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1923 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1924 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1925 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1926 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1927 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1928 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1929 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1930 for (jj=0;jj<size_of_constraint;jj++) { 1931 for (ii=0;ii<primal_dofs;ii++) { 1932 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1933 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1934 } 1935 } 1936 if (!valid_qr) { 1937 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1938 for (jj=0;jj<size_of_constraint;jj++) { 1939 for (ii=0;ii<primal_dofs;ii++) { 1940 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1941 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1942 } 1943 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1944 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1945 } 1946 } 1947 } 1948 } else { 1949 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1950 } 1951 } 1952 } else { /* simple transformation block */ 1953 PetscInt row,col; 1954 PetscScalar val; 1955 for (j=0;j<size_of_constraint;j++) { 1956 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 1957 if (!PetscBTLookup(is_primal,row)) { 1958 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1959 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 1960 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 1961 } else { 1962 for (k=0;k<size_of_constraint;k++) { 1963 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1964 if (row != col) { 1965 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1966 } else { 1967 val = 1.0; 1968 } 1969 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 1970 } 1971 } 1972 } 1973 } 1974 /* increment primal counter */ 1975 primal_counter += primal_dofs; 1976 } else { 1977 if (pcbddc->dbg_flag) { 1978 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1979 } 1980 } 1981 /* increment constraint counter total_counts */ 1982 total_counts += primal_dofs; 1983 } 1984 1985 /* free workspace */ 1986 if (qr_needed) { 1987 if (pcbddc->dbg_flag) { 1988 ierr = PetscFree(work);CHKERRQ(ierr); 1989 } 1990 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1991 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1992 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1993 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1994 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1995 } 1996 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1997 /* assembling */ 1998 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1999 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2000 /* 2001 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2002 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2003 */ 2004 } 2005 2006 /* flush dbg viewer */ 2007 if (pcbddc->dbg_flag) { 2008 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2009 } 2010 2011 /* free workspace */ 2012 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2013 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2014 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2015 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2016 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2017 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2018 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2019 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2020 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2021 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2022 PetscFunctionReturn(0); 2023 } 2024 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2027 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2028 { 2029 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2030 PC_IS *pcis = (PC_IS*)pc->data; 2031 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2032 PetscInt bs,ierr,i,vertex_size; 2033 PetscViewer viewer=pcbddc->dbg_viewer; 2034 2035 PetscFunctionBegin; 2036 /* Init local Graph struct */ 2037 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2038 2039 /* Check validity of the csr graph passed in by the user */ 2040 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2041 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2042 } 2043 2044 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2045 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2046 Mat mat_adj; 2047 const PetscInt *xadj,*adjncy; 2048 PetscBool flg_row=PETSC_TRUE; 2049 2050 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2051 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2052 if (!flg_row) { 2053 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2054 } 2055 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2056 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2057 if (!flg_row) { 2058 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2059 } 2060 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2061 } 2062 2063 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2064 vertex_size = 1; 2065 if (!pcbddc->user_provided_isfordofs) { 2066 if (!pcbddc->n_ISForDofs) { 2067 IS *custom_ISForDofs; 2068 2069 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2070 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2071 for (i=0;i<bs;i++) { 2072 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2073 } 2074 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2075 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2076 /* remove my references to IS objects */ 2077 for (i=0;i<bs;i++) { 2078 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2079 } 2080 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2081 } 2082 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2083 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2084 } 2085 2086 /* Setup of Graph */ 2087 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2088 2089 /* Graph's connected components analysis */ 2090 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2091 2092 /* print some info to stdout */ 2093 if (pcbddc->dbg_flag) { 2094 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2095 } 2096 PetscFunctionReturn(0); 2097 } 2098 2099 #undef __FUNCT__ 2100 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2101 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2102 { 2103 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2104 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2105 PetscErrorCode ierr; 2106 2107 PetscFunctionBegin; 2108 n = 0; 2109 vertices = 0; 2110 if (pcbddc->ConstraintMatrix) { 2111 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2112 for (i=0;i<local_primal_size;i++) { 2113 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2114 if (size_of_constraint == 1) n++; 2115 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2116 } 2117 if (vertices_idx) { 2118 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2119 n = 0; 2120 for (i=0;i<local_primal_size;i++) { 2121 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2122 if (size_of_constraint == 1) { 2123 vertices[n++]=row_cmat_indices[0]; 2124 } 2125 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2126 } 2127 } 2128 } 2129 *n_vertices = n; 2130 if (vertices_idx) *vertices_idx = vertices; 2131 PetscFunctionReturn(0); 2132 } 2133 2134 #undef __FUNCT__ 2135 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2136 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2137 { 2138 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2139 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2140 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2141 PetscBT touched; 2142 PetscErrorCode ierr; 2143 2144 /* This function assumes that the number of local constraints per connected component 2145 is not greater than the number of nodes defined for the connected component 2146 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2147 PetscFunctionBegin; 2148 n = 0; 2149 constraints_index = 0; 2150 if (pcbddc->ConstraintMatrix) { 2151 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2152 max_size_of_constraint = 0; 2153 for (i=0;i<local_primal_size;i++) { 2154 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2155 if (size_of_constraint > 1) { 2156 n++; 2157 } 2158 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2159 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2160 } 2161 if (constraints_idx) { 2162 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2163 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2164 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2165 n = 0; 2166 for (i=0;i<local_primal_size;i++) { 2167 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2168 if (size_of_constraint > 1) { 2169 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2170 /* find first untouched local node */ 2171 j = 0; 2172 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2173 min_index = row_cmat_global_indices[j]; 2174 min_loc = j; 2175 /* search the minimum among nodes not yet touched on the connected component 2176 since there can be more than one constraint on a single cc */ 2177 for (j=1;j<size_of_constraint;j++) { 2178 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2179 min_index = row_cmat_global_indices[j]; 2180 min_loc = j; 2181 } 2182 } 2183 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2184 constraints_index[n++] = row_cmat_indices[min_loc]; 2185 } 2186 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2187 } 2188 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2189 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2190 } 2191 } 2192 *n_constraints = n; 2193 if (constraints_idx) *constraints_idx = constraints_index; 2194 PetscFunctionReturn(0); 2195 } 2196 2197 /* the next two functions has been adapted from pcis.c */ 2198 #undef __FUNCT__ 2199 #define __FUNCT__ "PCBDDCApplySchur" 2200 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2201 { 2202 PetscErrorCode ierr; 2203 PC_IS *pcis = (PC_IS*)(pc->data); 2204 2205 PetscFunctionBegin; 2206 if (!vec2_B) { vec2_B = v; } 2207 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2208 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2209 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2210 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2211 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2212 PetscFunctionReturn(0); 2213 } 2214 2215 #undef __FUNCT__ 2216 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2217 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2218 { 2219 PetscErrorCode ierr; 2220 PC_IS *pcis = (PC_IS*)(pc->data); 2221 2222 PetscFunctionBegin; 2223 if (!vec2_B) { vec2_B = v; } 2224 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2225 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2226 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2227 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2228 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2229 PetscFunctionReturn(0); 2230 } 2231 2232 #undef __FUNCT__ 2233 #define __FUNCT__ "PCBDDCSubsetNumbering" 2234 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2235 { 2236 Vec local_vec,global_vec; 2237 IS seqis,paris; 2238 VecScatter scatter_ctx; 2239 PetscScalar *array; 2240 PetscInt *temp_global_dofs; 2241 PetscScalar globalsum; 2242 PetscInt i,j,s; 2243 PetscInt nlocals,first_index,old_index,max_local; 2244 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2245 PetscMPIInt *dof_sizes,*dof_displs; 2246 PetscBool first_found; 2247 PetscErrorCode ierr; 2248 2249 PetscFunctionBegin; 2250 /* mpi buffers */ 2251 MPI_Comm_size(comm,&size_prec_comm); 2252 MPI_Comm_rank(comm,&rank_prec_comm); 2253 j = ( !rank_prec_comm ? size_prec_comm : 0); 2254 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2255 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2256 /* get maximum size of subset */ 2257 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2258 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2259 max_local = 0; 2260 if (n_local_dofs) { 2261 max_local = temp_global_dofs[0]; 2262 for (i=1;i<n_local_dofs;i++) { 2263 if (max_local < temp_global_dofs[i] ) { 2264 max_local = temp_global_dofs[i]; 2265 } 2266 } 2267 } 2268 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2269 max_global++; 2270 max_local = 0; 2271 if (n_local_dofs) { 2272 max_local = local_dofs[0]; 2273 for (i=1;i<n_local_dofs;i++) { 2274 if (max_local < local_dofs[i] ) { 2275 max_local = local_dofs[i]; 2276 } 2277 } 2278 } 2279 max_local++; 2280 /* allocate workspace */ 2281 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2282 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2283 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2284 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2285 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2286 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2287 /* create scatter */ 2288 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2289 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2290 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2291 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2292 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2293 /* init array */ 2294 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2295 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2296 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2297 if (local_dofs_mult) { 2298 for (i=0;i<n_local_dofs;i++) { 2299 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2300 } 2301 } else { 2302 for (i=0;i<n_local_dofs;i++) { 2303 array[local_dofs[i]]=1.0; 2304 } 2305 } 2306 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2307 /* scatter into global vec and get total number of global dofs */ 2308 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2309 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2310 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2311 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2312 /* Fill global_vec with cumulative function for global numbering */ 2313 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2314 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2315 nlocals = 0; 2316 first_index = -1; 2317 first_found = PETSC_FALSE; 2318 for (i=0;i<s;i++) { 2319 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2320 first_found = PETSC_TRUE; 2321 first_index = i; 2322 } 2323 nlocals += (PetscInt)PetscRealPart(array[i]); 2324 } 2325 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2326 if (!rank_prec_comm) { 2327 dof_displs[0]=0; 2328 for (i=1;i<size_prec_comm;i++) { 2329 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2330 } 2331 } 2332 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2333 if (first_found) { 2334 array[first_index] += (PetscScalar)nlocals; 2335 old_index = first_index; 2336 for (i=first_index+1;i<s;i++) { 2337 if (PetscRealPart(array[i]) > 0.0) { 2338 array[i] += array[old_index]; 2339 old_index = i; 2340 } 2341 } 2342 } 2343 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2344 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2345 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2346 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2347 /* get global ordering of local dofs */ 2348 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2349 if (local_dofs_mult) { 2350 for (i=0;i<n_local_dofs;i++) { 2351 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2352 } 2353 } else { 2354 for (i=0;i<n_local_dofs;i++) { 2355 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2356 } 2357 } 2358 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2359 /* free workspace */ 2360 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2361 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2362 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2363 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2364 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2365 /* return pointer to global ordering of local dofs */ 2366 *global_numbering_subset = temp_global_dofs; 2367 PetscFunctionReturn(0); 2368 } 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2372 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2373 { 2374 PetscInt i,j; 2375 PetscScalar *alphas; 2376 PetscErrorCode ierr; 2377 2378 PetscFunctionBegin; 2379 /* this implements stabilized Gram-Schmidt */ 2380 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2381 for (i=0;i<n;i++) { 2382 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2383 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2384 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2385 } 2386 ierr = PetscFree(alphas);CHKERRQ(ierr); 2387 PetscFunctionReturn(0); 2388 } 2389 2390 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatConvert_IS_AIJ" 2393 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2394 { 2395 Mat new_mat; 2396 Mat_IS *matis = (Mat_IS*)(mat->data); 2397 /* info on mat */ 2398 PetscInt bs,rows,cols; 2399 PetscInt lrows,lcols; 2400 PetscInt local_rows,local_cols; 2401 PetscMPIInt nsubdomains; 2402 /* preallocation */ 2403 Vec vec_dnz,vec_onz; 2404 PetscScalar *my_dnz,*my_onz,*array; 2405 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2406 PetscInt index_row,index_col,owner; 2407 PetscInt *local_indices,*global_indices; 2408 /* work */ 2409 PetscInt i,j; 2410 PetscErrorCode ierr; 2411 2412 PetscFunctionBegin; 2413 /* CHECKS */ 2414 /* get info from mat */ 2415 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2416 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2417 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2418 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2419 2420 /* MAT_INITIAL_MATRIX */ 2421 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2422 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2423 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2424 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2425 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2426 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2427 2428 /* 2429 preallocation 2430 */ 2431 2432 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2433 /* 2434 Some vectors are needed to sum up properly on shared interface dofs. 2435 Note that preallocation is not exact, since it overestimates nonzeros 2436 */ 2437 /* 2438 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2439 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2440 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2441 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2442 */ 2443 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2444 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2445 /* All processes need to compute entire row ownership */ 2446 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2447 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2448 for (i=0;i<nsubdomains;i++) { 2449 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2450 row_ownership[j]=i; 2451 } 2452 } 2453 /* map indices of local mat to global values */ 2454 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2455 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2456 for (i=0;i<local_rows;i++) local_indices[i]=i; 2457 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2458 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2459 2460 /* 2461 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2462 then, they will be summed up properly. This way, preallocation is always sufficient 2463 */ 2464 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2465 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2466 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2467 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2468 for (i=0;i<local_rows;i++) { 2469 index_row = global_indices[i]; 2470 for (j=i;j<local_rows;j++) { 2471 owner = row_ownership[index_row]; 2472 index_col = global_indices[j]; 2473 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2474 my_dnz[i] += 1.0; 2475 } else { /* offdiag block */ 2476 my_onz[i] += 1.0; 2477 } 2478 /* same as before, interchanging rows and cols */ 2479 if (i != j) { 2480 owner = row_ownership[index_col]; 2481 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2482 my_dnz[j] += 1.0; 2483 } else { 2484 my_onz[j] += 1.0; 2485 } 2486 } 2487 } 2488 } 2489 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2490 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2491 if (local_rows) { /* multilevel guard */ 2492 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2493 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2494 } 2495 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2496 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2497 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2498 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2499 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2500 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2501 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2502 2503 /* set computed preallocation in dnz and onz */ 2504 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2505 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2506 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2507 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2508 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2509 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2510 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2511 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2512 2513 /* Resize preallocation if overestimated */ 2514 for (i=0;i<lrows;i++) { 2515 dnz[i] = PetscMin(dnz[i],lcols); 2516 onz[i] = PetscMin(onz[i],cols-lcols); 2517 } 2518 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2519 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2520 2521 /* 2522 Set values. Very Basic. 2523 */ 2524 for (i=0;i<local_rows;i++) { 2525 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2526 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2527 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2528 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2529 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2530 } 2531 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2532 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2533 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2534 2535 /* get back mat */ 2536 *M = new_mat; 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "MatISSubassemble_Private" 2542 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2543 { 2544 Mat subdomain_adj; 2545 IS new_ranks,ranks_send_to; 2546 MatPartitioning partitioner; 2547 Mat_IS *matis; 2548 PetscInt n_neighs,*neighs,*n_shared,**shared; 2549 PetscInt prank; 2550 PetscMPIInt size,rank,color; 2551 PetscInt *xadj,*adjncy,*oldranks; 2552 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2553 PetscInt i,j,n_subdomains,local_size,threshold=0; 2554 PetscErrorCode ierr; 2555 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2556 PetscSubcomm subcomm; 2557 2558 PetscFunctionBegin; 2559 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2560 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2561 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2562 2563 /* Get info on mapping */ 2564 matis = (Mat_IS*)(mat->data); 2565 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2566 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2567 2568 /* build local CSR graph of subdomains' connectivity */ 2569 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2570 xadj[0] = 0; 2571 xadj[1] = PetscMax(n_neighs-1,0); 2572 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2573 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2574 2575 if (threshold) { 2576 PetscInt* count,min_threshold; 2577 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2578 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2579 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2580 for (j=0;j<n_shared[i];j++) { 2581 count[shared[i][j]] += 1; 2582 } 2583 } 2584 /* adapt threshold since we dont want to lose significant connections */ 2585 min_threshold = n_neighs; 2586 for (i=1;i<n_neighs;i++) { 2587 for (j=0;j<n_shared[i];j++) { 2588 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2589 } 2590 } 2591 PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold); 2592 threshold = PetscMax(min_threshold+1,threshold); 2593 PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold); 2594 xadj[1] = 0; 2595 for (i=1;i<n_neighs;i++) { 2596 for (j=0;j<n_shared[i];j++) { 2597 if (count[shared[i][j]] < threshold) { 2598 adjncy[xadj[1]] = neighs[i]; 2599 adjncy_wgt[xadj[1]] = n_shared[i]; 2600 xadj[1]++; 2601 break; 2602 } 2603 } 2604 } 2605 ierr = PetscFree(count);CHKERRQ(ierr); 2606 } else { 2607 if (xadj[1]) { 2608 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2609 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2610 } 2611 } 2612 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2613 if (use_square) { 2614 for (i=0;i<xadj[1];i++) { 2615 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2616 } 2617 } 2618 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2619 2620 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2621 2622 /* 2623 Restrict work on active processes only. 2624 */ 2625 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2626 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2627 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2628 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2629 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2630 if (color) { 2631 ierr = PetscFree(xadj);CHKERRQ(ierr); 2632 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2633 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2634 } else { 2635 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2636 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2637 prank = rank; 2638 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2639 for (i=0;i<size;i++) { 2640 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2641 } 2642 for (i=0;i<xadj[1];i++) { 2643 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2644 } 2645 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2646 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2647 n_subdomains = (PetscInt)size; 2648 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2649 2650 /* Partition */ 2651 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2652 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2653 if (use_vwgt) { 2654 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2655 v_wgt[0] = local_size; 2656 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2657 } 2658 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2659 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2660 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2661 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2662 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2663 2664 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2665 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2666 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2667 /* clean up */ 2668 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2669 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2670 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2671 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2672 } 2673 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2674 2675 /* assemble parallel IS for sends */ 2676 i = 1; 2677 if (color) i=0; 2678 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2679 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2680 2681 /* get back IS */ 2682 *is_sends = ranks_send_to; 2683 PetscFunctionReturn(0); 2684 } 2685 2686 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2687 2688 #undef __FUNCT__ 2689 #define __FUNCT__ "MatISSubassemble" 2690 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2691 { 2692 Mat local_mat,new_mat; 2693 Mat_IS *matis; 2694 IS is_sends_internal; 2695 PetscInt rows,cols; 2696 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2697 PetscBool ismatis,isdense; 2698 ISLocalToGlobalMapping l2gmap; 2699 PetscInt* l2gmap_indices; 2700 const PetscInt* is_indices; 2701 MatType new_local_type; 2702 MatTypePrivate new_local_type_private; 2703 /* buffers */ 2704 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2705 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2706 /* MPI */ 2707 MPI_Comm comm; 2708 PetscMPIInt n_sends,n_recvs,commsize; 2709 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2710 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2711 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2712 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2713 PetscErrorCode ierr; 2714 2715 PetscFunctionBegin; 2716 /* checks */ 2717 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2718 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2719 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2720 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2721 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2722 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2723 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2724 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2725 PetscValidLogicalCollectiveInt(mat,bs,0); 2726 /* prepare IS for sending if not provided */ 2727 if (!is_sends) { 2728 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2729 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2730 } else { 2731 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2732 is_sends_internal = is_sends; 2733 } 2734 2735 /* get pointer of MATIS data */ 2736 matis = (Mat_IS*)mat->data; 2737 2738 /* get comm */ 2739 comm = PetscObjectComm((PetscObject)mat); 2740 2741 /* compute number of sends */ 2742 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2743 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2744 2745 /* compute number of receives */ 2746 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2747 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2748 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2749 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2750 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2751 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2752 ierr = PetscFree(iflags);CHKERRQ(ierr); 2753 2754 /* prepare send/receive buffers */ 2755 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2756 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2757 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2758 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2759 2760 /* Get data from local mat */ 2761 if (!isdense) { 2762 /* TODO: See below some guidelines on how to prepare the local buffers */ 2763 /* 2764 send_buffer_vals should contain the raw values of the local matrix 2765 send_buffer_idxs should contain: 2766 - MatType_PRIVATE type 2767 - PetscInt size_of_l2gmap 2768 - PetscInt global_row_indices[size_of_l2gmap] 2769 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2770 */ 2771 } else { 2772 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2773 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2774 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2775 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2776 send_buffer_idxs[1] = i; 2777 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2778 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2779 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2780 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2781 for (i=0;i<n_sends;i++) { 2782 ilengths_vals[is_indices[i]] = len*len; 2783 ilengths_idxs[is_indices[i]] = len+2; 2784 } 2785 } 2786 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2787 buf_size_idxs = 0; 2788 buf_size_vals = 0; 2789 for (i=0;i<n_recvs;i++) { 2790 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2791 buf_size_vals += (PetscInt)olengths_vals[i]; 2792 } 2793 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2794 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2795 2796 /* get new tags for clean communications */ 2797 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2798 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2799 2800 /* allocate for requests */ 2801 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2802 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2803 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2804 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2805 2806 /* communications */ 2807 ptr_idxs = recv_buffer_idxs; 2808 ptr_vals = recv_buffer_vals; 2809 for (i=0;i<n_recvs;i++) { 2810 source_dest = onodes[i]; 2811 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2812 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2813 ptr_idxs += olengths_idxs[i]; 2814 ptr_vals += olengths_vals[i]; 2815 } 2816 for (i=0;i<n_sends;i++) { 2817 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2818 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2819 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2820 } 2821 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2822 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2823 2824 /* assemble new l2g map */ 2825 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2826 ptr_idxs = recv_buffer_idxs; 2827 buf_size_idxs = 0; 2828 for (i=0;i<n_recvs;i++) { 2829 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2830 ptr_idxs += olengths_idxs[i]; 2831 } 2832 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2833 ptr_idxs = recv_buffer_idxs; 2834 buf_size_idxs = 0; 2835 for (i=0;i<n_recvs;i++) { 2836 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2837 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2838 ptr_idxs += olengths_idxs[i]; 2839 } 2840 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2841 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2842 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2843 2844 /* infer new local matrix type from received local matrices type */ 2845 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2846 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2847 new_local_type_private = MATAIJ_PRIVATE; 2848 new_local_type = MATSEQAIJ; 2849 if (n_recvs) { 2850 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 2851 ptr_idxs = recv_buffer_idxs; 2852 for (i=0;i<n_recvs;i++) { 2853 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2854 new_local_type_private = MATAIJ_PRIVATE; 2855 break; 2856 } 2857 ptr_idxs += olengths_idxs[i]; 2858 } 2859 switch (new_local_type_private) { 2860 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2861 new_local_type = MATSEQAIJ; 2862 bs = 1; 2863 break; 2864 case MATAIJ_PRIVATE: 2865 new_local_type = MATSEQAIJ; 2866 bs = 1; 2867 break; 2868 case MATBAIJ_PRIVATE: 2869 new_local_type = MATSEQBAIJ; 2870 break; 2871 case MATSBAIJ_PRIVATE: 2872 new_local_type = MATSEQSBAIJ; 2873 break; 2874 default: 2875 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2876 break; 2877 } 2878 } 2879 2880 /* create MATIS object */ 2881 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2882 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2883 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2884 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2885 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2886 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2887 2888 /* set values */ 2889 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2890 ptr_vals = recv_buffer_vals; 2891 ptr_idxs = recv_buffer_idxs; 2892 for (i=0;i<n_recvs;i++) { 2893 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2894 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2895 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2896 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2897 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2898 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2899 } 2900 ptr_idxs += olengths_idxs[i]; 2901 ptr_vals += olengths_vals[i]; 2902 } 2903 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2904 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2905 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2906 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2907 2908 { /* check */ 2909 Vec lvec,rvec; 2910 PetscReal infty_error; 2911 2912 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2913 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2914 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2915 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2916 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2917 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2918 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2919 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2920 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2921 } 2922 2923 /* free workspace */ 2924 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2925 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2926 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2927 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2928 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2929 if (isdense) { 2930 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2931 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2932 } else { 2933 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2934 } 2935 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2936 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2937 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2938 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2939 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2940 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2941 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2942 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2943 ierr = PetscFree(onodes);CHKERRQ(ierr); 2944 /* get back new mat */ 2945 *mat_n = new_mat; 2946 PetscFunctionReturn(0); 2947 } 2948 2949 #undef __FUNCT__ 2950 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2951 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2952 { 2953 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2954 PC_IS *pcis = (PC_IS*)pc->data; 2955 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2956 MatNullSpace CoarseNullSpace=NULL; 2957 ISLocalToGlobalMapping coarse_islg; 2958 IS coarse_is; 2959 PetscInt max_it,coarse_size,*local_primal_indices=NULL; 2960 PetscInt im_active=-1,active_procs=-1; 2961 PC pc_temp; 2962 PCType coarse_pc_type; 2963 KSPType coarse_ksp_type; 2964 PetscBool multilevel_requested,multilevel_allowed; 2965 PetscBool setsym,issym,isbddc,isnn; 2966 MatStructure matstruct; 2967 PetscErrorCode ierr; 2968 2969 PetscFunctionBegin; 2970 /* Assign global numbering to coarse dofs */ 2971 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2972 2973 /* infer some info from user */ 2974 issym = PETSC_FALSE; 2975 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2976 multilevel_allowed = PETSC_FALSE; 2977 multilevel_requested = PETSC_FALSE; 2978 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 2979 if (multilevel_requested) { 2980 /* count "active processes" */ 2981 im_active = !!(pcis->n); 2982 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2983 if (active_procs/pcbddc->coarsening_ratio < 2) { 2984 multilevel_allowed = PETSC_FALSE; 2985 } else { 2986 multilevel_allowed = PETSC_TRUE; 2987 } 2988 } 2989 2990 /* set defaults for coarse KSP and PC */ 2991 if (multilevel_allowed) { 2992 if (issym) { 2993 coarse_ksp_type = KSPRICHARDSON; 2994 } else { 2995 coarse_ksp_type = KSPCHEBYSHEV; 2996 } 2997 coarse_pc_type = PCBDDC; 2998 } else { 2999 coarse_ksp_type = KSPPREONLY; 3000 coarse_pc_type = PCREDUNDANT; 3001 } 3002 3003 /* create the coarse KSP object only once with defaults */ 3004 if (!pcbddc->coarse_ksp) { 3005 char prefix[256],str_level[3]; 3006 size_t len; 3007 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3008 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3009 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3010 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3011 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3012 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3013 /* prefix */ 3014 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3015 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3016 if (!pcbddc->current_level) { 3017 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3018 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3019 } else { 3020 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3021 if (pcbddc->current_level>1) len -= 2; 3022 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3023 *(prefix+len)='\0'; 3024 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3025 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3026 } 3027 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3028 } 3029 /* allow user customization */ 3030 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3031 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3032 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3033 3034 /* get some info after set from options */ 3035 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3036 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3037 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3038 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3039 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3040 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3041 isbddc = PETSC_FALSE; 3042 } 3043 3044 /* propagate BDDC info to the next level */ 3045 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3046 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3047 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3048 3049 /* creates temporary MATIS object for coarse matrix */ 3050 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3051 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3052 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3053 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3054 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3055 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3056 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3057 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3058 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3059 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 3060 3061 /* assemble coarse matrix */ 3062 if (isbddc || isnn) { 3063 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3064 } else { 3065 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3066 } 3067 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3068 3069 /* create local to global scatters for coarse problem */ 3070 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3071 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3072 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3073 3074 /* propagate symmetry info to coarse matrix */ 3075 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3076 3077 /* Compute coarse null space (special handling by BDDC only) */ 3078 if (pcbddc->NullSpace) { 3079 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3080 if (isbddc) { 3081 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3082 } else { 3083 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3084 } 3085 } 3086 3087 /* set operators */ 3088 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3089 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3090 3091 /* additional KSP customization */ 3092 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3093 if (max_it < 5) { 3094 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3095 } 3096 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3097 3098 3099 /* print some info if requested */ 3100 if (pcbddc->dbg_flag) { 3101 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3102 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3103 if (!multilevel_allowed) { 3104 if (multilevel_requested) { 3105 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3106 } else if (pcbddc->max_levels) { 3107 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3108 } 3109 } 3110 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3111 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3112 } 3113 3114 /* setup coarse ksp */ 3115 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3116 if (pcbddc->dbg_flag) { 3117 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3118 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3119 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3120 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3121 } 3122 3123 /* Check coarse problem if requested */ 3124 if (pcbddc->dbg_flag) { 3125 KSP check_ksp; 3126 KSPType check_ksp_type; 3127 PC check_pc; 3128 Vec check_vec; 3129 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3130 PetscInt its; 3131 PetscBool ispreonly,compute; 3132 3133 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3134 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3135 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3136 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3137 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3138 if (ispreonly) { 3139 check_ksp_type = KSPPREONLY; 3140 compute = PETSC_FALSE; 3141 } else { 3142 if (issym) check_ksp_type = KSPCG; 3143 else check_ksp_type = KSPGMRES; 3144 compute = PETSC_TRUE; 3145 } 3146 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3147 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3148 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3149 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3150 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3151 /* create random vec */ 3152 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3153 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3154 if (CoarseNullSpace) { 3155 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3156 } 3157 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3158 /* solve coarse problem */ 3159 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3160 if (CoarseNullSpace) { 3161 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3162 } 3163 /* check coarse problem residual error */ 3164 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3165 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3166 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3167 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3168 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3169 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3170 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3171 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3172 /* get eigenvalue estimation if preonly has not been requested */ 3173 if (!ispreonly) { 3174 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3175 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3176 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3177 } 3178 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3179 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3180 } 3181 /* free memory */ 3182 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3183 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3184 PetscFunctionReturn(0); 3185 } 3186 3187 #undef __FUNCT__ 3188 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3189 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3190 { 3191 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3192 PC_IS* pcis = (PC_IS*)pc->data; 3193 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3194 PetscInt i,j,coarse_size; 3195 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3196 PetscErrorCode ierr; 3197 3198 PetscFunctionBegin; 3199 /* get indices in local ordering for vertices and constraints */ 3200 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3201 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3202 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3203 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3204 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3205 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3206 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3207 3208 /* Compute global number of coarse dofs */ 3209 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3210 3211 /* check numbering */ 3212 if (pcbddc->dbg_flag) { 3213 PetscScalar coarsesum,*array; 3214 3215 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3216 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3217 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3218 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3219 for (i=0;i<pcbddc->local_primal_size;i++) { 3220 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3221 } 3222 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3223 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3224 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3225 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3226 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3227 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3228 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3229 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3230 for (i=0;i<pcis->n;i++) { 3231 if (array[i] == 1.0) { 3232 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3233 } 3234 } 3235 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3236 for (i=0;i<pcis->n;i++) { 3237 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3238 } 3239 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3240 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3241 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3242 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3243 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3244 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3245 if (pcbddc->dbg_flag > 1) { 3246 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3247 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3248 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3249 for (i=0;i<pcbddc->local_primal_size;i++) { 3250 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]); 3251 } 3252 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3253 } 3254 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3255 } 3256 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3257 /* get back data */ 3258 *coarse_size_n = coarse_size; 3259 *local_primal_indices_n = local_primal_indices; 3260 PetscFunctionReturn(0); 3261 } 3262 3263