1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCResetCustomization" 47 PetscErrorCode PCBDDCResetCustomization(PC pc) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 PetscInt i; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 56 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 59 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 for (i=0;i<pcbddc->n_ISForDofs;i++) { 62 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 63 } 64 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 65 pcbddc->n_ISForDofs = 0; 66 PetscFunctionReturn(0); 67 } 68 69 #undef __FUNCT__ 70 #define __FUNCT__ "PCBDDCResetTopography" 71 PetscErrorCode PCBDDCResetTopography(PC pc) 72 { 73 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 74 PetscErrorCode ierr; 75 76 PetscFunctionBegin; 77 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 78 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 79 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 80 PetscFunctionReturn(0); 81 } 82 83 #undef __FUNCT__ 84 #define __FUNCT__ "PCBDDCResetSolvers" 85 PetscErrorCode PCBDDCResetSolvers(PC pc) 86 { 87 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 92 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 97 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 99 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 100 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 102 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 103 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 104 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 106 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 107 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 108 PetscFunctionReturn(0); 109 } 110 111 #undef __FUNCT__ 112 #define __FUNCT__ "PCBDDCCreateWorkVectors" 113 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 114 { 115 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 116 PC_IS *pcis = (PC_IS*)pc->data; 117 VecType impVecType; 118 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 119 PetscErrorCode ierr; 120 121 PetscFunctionBegin; 122 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 123 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 124 local_primal_size = n_constraints+n_vertices; 125 n_R = pcis->n-n_vertices; 126 /* local work vectors */ 127 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 128 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 129 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 130 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 131 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 132 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 133 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 134 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 135 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 136 if (n_constraints) { 137 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 138 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 139 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 140 } 141 PetscFunctionReturn(0); 142 } 143 144 #undef __FUNCT__ 145 #define __FUNCT__ "PCBDDCSetUpCorrection" 146 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 147 { 148 PetscErrorCode ierr; 149 /* pointers to pcis and pcbddc */ 150 PC_IS* pcis = (PC_IS*)pc->data; 151 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 152 /* submatrices of local problem */ 153 Mat A_RV,A_VR,A_VV; 154 /* working matrices */ 155 Mat M1,M2,M3,C_CR; 156 /* working vectors */ 157 Vec vec1_C,vec2_C,vec1_V,vec2_V; 158 /* additional working stuff */ 159 IS is_aux; 160 ISLocalToGlobalMapping BtoNmap; 161 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 162 const PetscScalar *array,*row_cmat_values; 163 const PetscInt *row_cmat_indices,*idx_R_local; 164 PetscInt *vertices,*idx_V_B,*auxindices; 165 PetscInt n_vertices,n_constraints,size_of_constraint; 166 PetscInt i,j,n_R,n_D,n_B; 167 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 168 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 169 MatType impMatType; 170 /* some shortcuts to scalars */ 171 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 172 /* for debugging purposes */ 173 PetscReal *coarsefunctions_errors,*constraints_errors; 174 175 PetscFunctionBegin; 176 /* get number of vertices and their local indices */ 177 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 178 n_constraints = pcbddc->local_primal_size-n_vertices; 179 /* Set Non-overlapping dimensions */ 180 n_B = pcis->n_B; n_D = pcis->n - n_B; 181 n_R = pcis->n-n_vertices; 182 183 /* Set types for local objects needed by BDDC precondtioner */ 184 impMatType = MATSEQDENSE; 185 186 /* Allocating some extra storage just to be safe */ 187 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 188 for (i=0;i<pcis->n;i++) auxindices[i]=i; 189 190 /* vertices in boundary numbering */ 191 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 192 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 193 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 194 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 195 if (i != n_vertices) { 196 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 197 } 198 199 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 200 if (n_constraints) { 201 /* work vectors */ 202 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 203 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 204 /* auxiliary matrices */ 205 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 206 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 207 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 208 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 209 210 /* Extract constraints on R nodes: C_{CR} */ 211 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 212 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 213 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 214 215 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 216 for (i=0;i<n_constraints;i++) { 217 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 218 /* Get row of constraint matrix in R numbering */ 219 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 220 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 221 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 222 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 223 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 224 /* Solve for row of constraint matrix in R numbering */ 225 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 226 /* Set values in local_auxmat2 */ 227 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 228 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 229 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 230 } 231 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 232 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 233 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 234 235 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 236 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 237 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 238 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 239 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 240 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 241 ierr = MatSetUp(M1);CHKERRQ(ierr); 242 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 243 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 244 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 245 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 246 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 247 ierr = MatDestroy(&M2);CHKERRQ(ierr); 248 ierr = MatDestroy(&M3);CHKERRQ(ierr); 249 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 250 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 251 } 252 253 /* Get submatrices from subdomain matrix */ 254 if (n_vertices) { 255 PetscInt ibs,mbs; 256 PetscBool issbaij; 257 Mat newmat; 258 259 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 260 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 261 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 262 if (ibs != mbs) { /* need to convert to SEQAIJ */ 263 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 264 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 265 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 266 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 267 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 268 } else { 269 /* this is safe */ 270 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 271 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 272 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 273 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 274 /* which of the two approaches is faster? */ 275 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 276 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 277 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 278 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 279 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 280 } else { 281 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 282 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 283 } 284 } 285 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 286 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 287 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 288 } 289 290 /* Matrix of coarse basis functions (local) */ 291 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 292 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 293 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 294 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 295 if (pcbddc->switch_static || pcbddc->dbg_flag) { 296 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 297 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 298 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 299 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 300 } 301 302 if (pcbddc->dbg_flag) { 303 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 304 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 305 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 306 } 307 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 308 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 309 310 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 311 312 /* vertices */ 313 for (i=0;i<n_vertices;i++) { 314 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 315 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 316 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 317 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 318 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 319 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 320 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 321 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 322 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 323 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 324 if (n_constraints) { 325 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 326 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 327 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 328 } 329 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 330 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 331 332 /* Set values in coarse basis function and subdomain part of coarse_mat */ 333 /* coarse basis functions */ 334 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 335 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 336 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 337 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 338 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 339 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 340 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 341 if (pcbddc->switch_static || pcbddc->dbg_flag) { 342 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 343 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 344 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 345 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 346 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 347 } 348 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 349 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 350 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 351 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 352 if (n_constraints) { 353 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 354 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 355 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 356 } 357 358 /* check */ 359 if (pcbddc->dbg_flag) { 360 /* assemble subdomain vector on local nodes */ 361 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 362 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 363 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 364 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 365 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 366 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 367 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 368 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 369 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 370 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 371 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 372 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 373 if (n_constraints) { 374 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 375 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 376 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 377 } 378 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 379 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 380 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 381 /* check saddle point solution */ 382 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 383 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 384 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 385 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 386 /* shift by the identity matrix */ 387 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 388 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 389 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 390 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 391 } 392 } 393 394 /* constraints */ 395 for (i=0;i<n_constraints;i++) { 396 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 397 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 398 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 399 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 400 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 401 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 402 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 403 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 404 if (n_vertices) { 405 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 406 } 407 /* Set values in coarse basis function and subdomain part of coarse_mat */ 408 /* coarse basis functions */ 409 j = i+n_vertices; /* don't touch this! */ 410 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 411 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 412 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 413 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 414 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 416 if (pcbddc->switch_static || pcbddc->dbg_flag) { 417 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 418 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 419 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 420 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 421 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 422 } 423 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 424 if (n_vertices) { 425 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 426 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 427 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 428 } 429 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 430 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 431 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 432 433 if (pcbddc->dbg_flag) { 434 /* assemble subdomain vector on nodes */ 435 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 436 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 437 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 438 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 439 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 440 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 441 /* assemble subdomain vector of lagrange multipliers */ 442 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 443 if (n_vertices) { 444 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 445 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 446 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 447 } 448 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 449 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 450 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 451 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 452 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 453 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 454 /* check saddle point solution */ 455 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 456 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 457 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 458 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 459 /* shift by the identity matrix */ 460 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 461 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 462 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 463 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 464 } 465 } 466 /* call assembling routines for local coarse basis */ 467 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 468 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 469 if (pcbddc->switch_static || pcbddc->dbg_flag) { 470 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 471 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 472 } 473 474 /* compute other basis functions for non-symmetric problems */ 475 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 476 if (!setsym || (setsym && !issym)) { 477 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 478 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 479 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 480 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 481 if (pcbddc->switch_static || pcbddc->dbg_flag) { 482 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 483 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 484 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 485 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 486 } 487 for (i=0;i<pcbddc->local_primal_size;i++) { 488 if (n_constraints) { 489 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 490 for (j=0;j<n_constraints;j++) { 491 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 492 } 493 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 494 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 495 } 496 if (i<n_vertices) { 497 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 498 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 499 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 500 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 501 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 502 if (n_constraints) { 503 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 504 } 505 } else { 506 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 507 } 508 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 509 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 510 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 511 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 512 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 513 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 514 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 515 if (i<n_vertices) { 516 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 517 } 518 if (pcbddc->switch_static || pcbddc->dbg_flag) { 519 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 520 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 521 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 522 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 523 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 524 } 525 526 if (pcbddc->dbg_flag) { 527 /* assemble subdomain vector on nodes */ 528 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 529 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 530 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 531 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 532 if (i<n_vertices) { 533 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 534 } 535 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 536 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 537 /* assemble subdomain vector of lagrange multipliers */ 538 for (j=0;j<pcbddc->local_primal_size;j++) { 539 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 540 } 541 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 542 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 543 /* check saddle point solution */ 544 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 545 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 546 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 547 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 548 /* shift by the identity matrix */ 549 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 550 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 551 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 552 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 553 } 554 } 555 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 556 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 557 if (pcbddc->switch_static || pcbddc->dbg_flag) { 558 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 559 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 560 } 561 } 562 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 563 /* Checking coarse_sub_mat and coarse basis functios */ 564 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 565 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 566 if (pcbddc->dbg_flag) { 567 Mat coarse_sub_mat; 568 Mat AUXMAT,TM1,TM2,TM3,TM4; 569 Mat coarse_phi_D,coarse_phi_B; 570 Mat coarse_psi_D,coarse_psi_B; 571 Mat A_II,A_BB,A_IB,A_BI; 572 MatType checkmattype=MATSEQAIJ; 573 PetscReal real_value; 574 575 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 576 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 577 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 578 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 579 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 580 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 581 if (pcbddc->coarse_psi_B) { 582 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 583 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 584 } 585 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 586 587 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 588 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 589 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 590 if (pcbddc->coarse_psi_B) { 591 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 592 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 593 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 594 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 595 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 596 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 597 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 598 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 599 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 600 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 601 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 602 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 603 } else { 604 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 605 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 606 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 607 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 608 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 609 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 610 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 611 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 612 } 613 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 614 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 615 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 616 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 617 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 618 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 619 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 623 for (i=0;i<pcbddc->local_primal_size;i++) { 624 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 625 } 626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 627 for (i=0;i<pcbddc->local_primal_size;i++) { 628 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 629 } 630 if (pcbddc->coarse_psi_B) { 631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 632 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 633 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 634 } 635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 636 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 637 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 638 } 639 } 640 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 641 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 642 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 643 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 644 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 645 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 646 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 647 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 648 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 649 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 650 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 651 if (pcbddc->coarse_psi_B) { 652 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 653 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 654 } 655 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 656 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 657 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 658 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 659 } 660 /* free memory */ 661 if (n_vertices) { 662 ierr = PetscFree(vertices);CHKERRQ(ierr); 663 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 664 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 665 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 666 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 667 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 668 } 669 if (n_constraints) { 670 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 671 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 672 ierr = MatDestroy(&M1);CHKERRQ(ierr); 673 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 674 } 675 ierr = PetscFree(auxindices);CHKERRQ(ierr); 676 /* get back data */ 677 *coarse_submat_vals_n = coarse_submat_vals; 678 PetscFunctionReturn(0); 679 } 680 681 #undef __FUNCT__ 682 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 683 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 684 { 685 PC_IS* pcis = (PC_IS*)(pc->data); 686 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 687 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 688 PetscBool issbaij,isbaij; 689 /* manage repeated solves */ 690 MatReuse reuse; 691 MatStructure matstruct; 692 PetscErrorCode ierr; 693 694 PetscFunctionBegin; 695 /* get mat flags */ 696 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 697 reuse = MAT_INITIAL_MATRIX; 698 if (pc->setupcalled) { 699 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 700 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 701 if (matstruct == SAME_NONZERO_PATTERN) { 702 reuse = MAT_REUSE_MATRIX; 703 } else { 704 reuse = MAT_INITIAL_MATRIX; 705 } 706 } 707 if (reuse == MAT_INITIAL_MATRIX) { 708 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 709 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 710 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 711 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 712 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 713 } 714 715 /* transform local matrices if needed */ 716 if (pcbddc->use_change_of_basis) { 717 Mat change_mat_all; 718 PetscScalar *row_cmat_values; 719 PetscInt *row_cmat_indices; 720 PetscInt *nnz,*is_indices,*temp_indices; 721 PetscInt i,j,k,n_D,n_B; 722 723 /* Get Non-overlapping dimensions */ 724 n_B = pcis->n_B; 725 n_D = pcis->n-n_B; 726 727 /* compute nonzero structure of change of basis on all local nodes */ 728 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 729 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 730 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 731 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 732 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 733 k=1; 734 for (i=0;i<n_B;i++) { 735 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 736 nnz[is_indices[i]]=j; 737 if (k < j) k = j; 738 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 739 } 740 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 741 /* assemble change of basis matrix on the whole set of local dofs */ 742 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 743 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 744 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 745 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 746 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 747 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 748 for (i=0;i<n_D;i++) { 749 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 750 } 751 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 752 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 753 for (i=0;i<n_B;i++) { 754 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 755 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 756 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 757 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 758 } 759 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 760 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 761 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 762 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 763 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 764 if (!issbaij && !isbaij) { 765 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 766 } else { 767 Mat work_mat; 768 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 769 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 770 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 771 } 772 /* 773 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 774 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 775 */ 776 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 777 ierr = PetscFree(nnz);CHKERRQ(ierr); 778 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 779 } else { 780 /* without change of basis, the local matrix is unchanged */ 781 if (!pcbddc->local_mat) { 782 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 783 pcbddc->local_mat = matis->A; 784 } 785 } 786 787 /* get submatrices */ 788 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 789 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 790 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 791 if (!issbaij) { 792 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 793 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 794 } else { 795 Mat newmat; 796 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 797 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 798 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 799 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 800 } 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 806 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 807 { 808 PC_IS* pcis = (PC_IS*)(pc->data); 809 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 810 IS is_aux1,is_aux2; 811 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 812 PetscInt n_vertices,i,j,n_R,n_D,n_B; 813 PetscInt vbs,bs; 814 PetscBT bitmask; 815 PetscErrorCode ierr; 816 817 PetscFunctionBegin; 818 /* Set Non-overlapping dimensions */ 819 n_B = pcis->n_B; n_D = pcis->n - n_B; 820 /* get vertex indices from constraint matrix */ 821 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 822 823 /* create auxiliary bitmask */ 824 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 825 for (i=0;i<n_vertices;i++) { 826 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 827 } 828 829 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 830 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 831 for (i=0, n_R=0; i<pcis->n; i++) { 832 if (!PetscBTLookup(bitmask,i)) { 833 idx_R_local[n_R] = i; 834 n_R++; 835 } 836 } 837 838 /* Block code */ 839 vbs = 1; 840 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 841 if (bs>1 && !(n_vertices%bs)) { 842 PetscBool is_blocked = PETSC_TRUE; 843 PetscInt *vary; 844 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 845 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 846 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 847 for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++; 848 for (i=0; i<n_vertices; i++) { 849 if (vary[i]!=0 && vary[i]!=bs) { 850 is_blocked = PETSC_FALSE; 851 break; 852 } 853 } 854 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 855 vbs = bs; 856 for (i=0;i<n_R/vbs;i++) { 857 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 858 } 859 } 860 ierr = PetscFree(vary);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(vertices);CHKERRQ(ierr); 863 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 864 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 865 866 /* print some info if requested */ 867 if (pcbddc->dbg_flag) { 868 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 869 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 870 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 871 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 872 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 873 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 874 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 875 } 876 877 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 878 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 879 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 880 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 881 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 882 for (i=0; i<n_D; i++) { 883 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 884 } 885 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 886 for (i=0, j=0; i<n_R; i++) { 887 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 888 aux_array1[j++] = i; 889 } 890 } 891 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 892 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 893 for (i=0, j=0; i<n_B; i++) { 894 if (!PetscBTLookup(bitmask,is_indices[i])) { 895 aux_array2[j++] = i; 896 } 897 } 898 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 899 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 900 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 901 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 902 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 903 904 if (pcbddc->switch_static || pcbddc->dbg_flag) { 905 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 906 for (i=0, j=0; i<n_R; i++) { 907 if (PetscBTLookup(bitmask,idx_R_local[i])) { 908 aux_array1[j++] = i; 909 } 910 } 911 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 912 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 913 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 914 } 915 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 916 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 917 PetscFunctionReturn(0); 918 } 919 920 921 #undef __FUNCT__ 922 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 923 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 924 { 925 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 926 PC_IS *pcis = (PC_IS*)pc->data; 927 PC pc_temp; 928 Mat A_RR; 929 Vec vec1,vec2,vec3; 930 MatStructure matstruct; 931 PetscScalar m_one = -1.0; 932 PetscReal value; 933 PetscInt n_D,n_R,ibs,mbs; 934 PetscBool use_exact,use_exact_reduced,issbaij; 935 PetscErrorCode ierr; 936 /* prefixes stuff */ 937 char dir_prefix[256],neu_prefix[256],str_level[3]; 938 size_t len; 939 940 PetscFunctionBegin; 941 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 942 943 /* compute prefixes */ 944 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 945 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 946 if (!pcbddc->current_level) { 947 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 948 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 949 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 950 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 951 } else { 952 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 953 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 954 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 955 len -= 15; /* remove "pc_bddc_coarse_" */ 956 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 957 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 958 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 959 *(dir_prefix+len)='\0'; 960 *(neu_prefix+len)='\0'; 961 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 962 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 963 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 964 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 965 } 966 967 /* DIRICHLET PROBLEM */ 968 /* Matrix for Dirichlet problem is pcis->A_II */ 969 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 970 if (!pcbddc->ksp_D) { /* create object if not yet build */ 971 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 972 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 973 /* default */ 974 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 975 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 976 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 977 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 978 if (issbaij) { 979 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 980 } else { 981 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 982 } 983 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 984 } 985 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 986 /* Allow user's customization */ 987 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 988 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 989 if (!n_D) { 990 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 991 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 992 } 993 /* Set Up KSP for Dirichlet problem of BDDC */ 994 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 995 /* set ksp_D into pcis data */ 996 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 997 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 998 pcis->ksp_D = pcbddc->ksp_D; 999 1000 /* NEUMANN PROBLEM */ 1001 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1002 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1003 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1004 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1005 if (ibs != mbs) { 1006 Mat newmat; 1007 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1008 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1009 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1010 } else { 1011 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1012 } 1013 if (!pcbddc->ksp_R) { /* create object if not yet build */ 1014 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1015 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1016 /* default */ 1017 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1018 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1019 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1020 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1021 if (issbaij) { 1022 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1023 } else { 1024 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1025 } 1026 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1027 } 1028 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1029 /* Allow user's customization */ 1030 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1031 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1032 if (!n_R) { 1033 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1034 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1035 } 1036 /* Set Up KSP for Neumann problem of BDDC */ 1037 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1038 1039 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1040 1041 /* Dirichlet */ 1042 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 1043 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1044 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1045 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 1046 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 1047 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1048 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1049 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1050 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1051 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1052 /* need to be adapted? */ 1053 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1054 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1055 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1056 /* print info */ 1057 if (pcbddc->dbg_flag) { 1058 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1059 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1060 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1061 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1062 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1063 } 1064 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1065 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1066 } 1067 1068 /* Neumann */ 1069 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 1070 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1071 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1072 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 1073 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 1074 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1075 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1076 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1077 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1078 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1079 /* need to be adapted? */ 1080 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1081 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1082 /* print info */ 1083 if (pcbddc->dbg_flag) { 1084 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1085 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1086 } 1087 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1088 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1089 } 1090 /* free Neumann problem's matrix */ 1091 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1097 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1098 { 1099 PetscErrorCode ierr; 1100 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1101 1102 PetscFunctionBegin; 1103 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1104 if (pcbddc->local_auxmat1) { 1105 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1106 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1107 } 1108 PetscFunctionReturn(0); 1109 } 1110 1111 #undef __FUNCT__ 1112 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1113 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1114 { 1115 PetscErrorCode ierr; 1116 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1117 PC_IS* pcis = (PC_IS*) (pc->data); 1118 const PetscScalar zero = 0.0; 1119 1120 PetscFunctionBegin; 1121 /* Application of PHI^T (or PSI^T) */ 1122 if (pcbddc->coarse_psi_B) { 1123 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1124 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1125 } else { 1126 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1127 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1128 } 1129 /* Scatter data of coarse_rhs */ 1130 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1131 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1132 1133 /* Local solution on R nodes */ 1134 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1135 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1136 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1137 if (pcbddc->switch_static) { 1138 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1139 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1140 } 1141 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1142 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1143 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1144 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1145 if (pcbddc->switch_static) { 1146 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1147 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1148 } 1149 1150 /* Coarse solution */ 1151 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1152 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1153 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1154 } 1155 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1156 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 1158 /* Sum contributions from two levels */ 1159 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1160 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1166 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1167 { 1168 PetscErrorCode ierr; 1169 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1170 1171 PetscFunctionBegin; 1172 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1178 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1179 { 1180 PetscErrorCode ierr; 1181 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1182 1183 PetscFunctionBegin; 1184 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 /* uncomment for testing purposes */ 1189 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1192 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1193 { 1194 PetscErrorCode ierr; 1195 PC_IS* pcis = (PC_IS*)(pc->data); 1196 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1197 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1198 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1199 MatType impMatType=MATSEQAIJ; 1200 /* one and zero */ 1201 PetscScalar one=1.0,zero=0.0; 1202 /* space to store constraints and their local indices */ 1203 PetscScalar *temp_quadrature_constraint; 1204 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1205 /* iterators */ 1206 PetscInt i,j,k,total_counts,temp_start_ptr; 1207 /* stuff to store connected components stored in pcbddc->mat_graph */ 1208 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1209 PetscInt n_ISForFaces,n_ISForEdges; 1210 /* near null space stuff */ 1211 MatNullSpace nearnullsp; 1212 const Vec *nearnullvecs; 1213 Vec *localnearnullsp; 1214 PetscBool nnsp_has_cnst; 1215 PetscInt nnsp_size; 1216 PetscScalar *array; 1217 /* BLAS integers */ 1218 PetscBLASInt lwork,lierr; 1219 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1220 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1221 /* LAPACK working arrays for SVD or POD */ 1222 PetscBool skip_lapack; 1223 PetscScalar *work; 1224 PetscReal *singular_vals; 1225 #if defined(PETSC_USE_COMPLEX) 1226 PetscReal *rwork; 1227 #endif 1228 #if defined(PETSC_MISSING_LAPACK_GESVD) 1229 PetscBLASInt Blas_one_2=1; 1230 PetscScalar *temp_basis,*correlation_mat; 1231 #else 1232 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1233 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1234 #endif 1235 /* change of basis */ 1236 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1237 PetscBool boolforchange,qr_needed; 1238 PetscBT touched,change_basis,qr_needed_idx; 1239 /* auxiliary stuff */ 1240 PetscInt *nnz,*is_indices,*local_to_B; 1241 /* some quantities */ 1242 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1243 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1244 1245 1246 PetscFunctionBegin; 1247 /* Get index sets for faces, edges and vertices from graph */ 1248 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1249 pcbddc->use_vertices = PETSC_TRUE; 1250 } 1251 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1252 /* HACK: provide functions to set change of basis */ 1253 if (!ISForVertices && pcbddc->NullSpace) { 1254 pcbddc->use_change_of_basis = PETSC_TRUE; 1255 pcbddc->use_change_on_faces = PETSC_FALSE; 1256 } 1257 /* print some info */ 1258 if (pcbddc->dbg_flag) { 1259 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1260 i = 0; 1261 if (ISForVertices) { 1262 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1263 } 1264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1266 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1267 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1268 } 1269 /* check if near null space is attached to global mat */ 1270 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1271 if (nearnullsp) { 1272 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1273 } else { /* if near null space is not provided BDDC uses constants by default */ 1274 nnsp_size = 0; 1275 nnsp_has_cnst = PETSC_TRUE; 1276 } 1277 /* get max number of constraints on a single cc */ 1278 max_constraints = nnsp_size; 1279 if (nnsp_has_cnst) max_constraints++; 1280 1281 /* 1282 Evaluate maximum storage size needed by the procedure 1283 - temp_indices will contain start index of each constraint stored as follows 1284 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1285 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1286 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1287 */ 1288 total_counts = n_ISForFaces+n_ISForEdges; 1289 total_counts *= max_constraints; 1290 n_vertices = 0; 1291 if (ISForVertices) { 1292 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1293 } 1294 total_counts += n_vertices; 1295 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1296 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1297 total_counts = 0; 1298 max_size_of_constraint = 0; 1299 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1300 if (i<n_ISForEdges) { 1301 used_IS = &ISForEdges[i]; 1302 } else { 1303 used_IS = &ISForFaces[i-n_ISForEdges]; 1304 } 1305 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1306 total_counts += j; 1307 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1308 } 1309 total_counts *= max_constraints; 1310 total_counts += n_vertices; 1311 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1312 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1313 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1314 /* local to boundary numbering */ 1315 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1316 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1317 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1318 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1319 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1320 /* get local part of global near null space vectors */ 1321 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1322 for (k=0;k<nnsp_size;k++) { 1323 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1324 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1325 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1326 } 1327 1328 /* whether or not to skip lapack calls */ 1329 skip_lapack = PETSC_TRUE; 1330 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1331 1332 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1333 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1334 PetscScalar temp_work; 1335 #if defined(PETSC_MISSING_LAPACK_GESVD) 1336 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1337 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1338 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1339 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1340 #if defined(PETSC_USE_COMPLEX) 1341 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1342 #endif 1343 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1344 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1345 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1346 lwork = -1; 1347 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1348 #if !defined(PETSC_USE_COMPLEX) 1349 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1350 #else 1351 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1352 #endif 1353 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1354 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1355 #else /* on missing GESVD */ 1356 /* SVD */ 1357 PetscInt max_n,min_n; 1358 max_n = max_size_of_constraint; 1359 min_n = max_constraints; 1360 if (max_size_of_constraint < max_constraints) { 1361 min_n = max_size_of_constraint; 1362 max_n = max_constraints; 1363 } 1364 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1365 #if defined(PETSC_USE_COMPLEX) 1366 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1367 #endif 1368 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1369 lwork = -1; 1370 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1371 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1372 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1373 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1374 #if !defined(PETSC_USE_COMPLEX) 1375 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1376 #else 1377 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1378 #endif 1379 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1380 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1381 #endif /* on missing GESVD */ 1382 /* Allocate optimal workspace */ 1383 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1384 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1385 } 1386 /* Now we can loop on constraining sets */ 1387 total_counts = 0; 1388 temp_indices[0] = 0; 1389 /* vertices */ 1390 if (ISForVertices) { 1391 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1392 if (nnsp_has_cnst) { /* consider all vertices */ 1393 for (i=0;i<n_vertices;i++) { 1394 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1395 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1396 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1397 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1398 total_counts++; 1399 } 1400 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1401 PetscBool used_vertex; 1402 for (i=0;i<n_vertices;i++) { 1403 used_vertex = PETSC_FALSE; 1404 k = 0; 1405 while (!used_vertex && k<nnsp_size) { 1406 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1407 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1408 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1409 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1410 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1411 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1412 total_counts++; 1413 used_vertex = PETSC_TRUE; 1414 } 1415 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1416 k++; 1417 } 1418 } 1419 } 1420 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1421 n_vertices = total_counts; 1422 } 1423 1424 /* edges and faces */ 1425 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1426 if (i<n_ISForEdges) { 1427 used_IS = &ISForEdges[i]; 1428 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1429 } else { 1430 used_IS = &ISForFaces[i-n_ISForEdges]; 1431 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1432 } 1433 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1434 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1435 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1436 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1437 /* change of basis should not be performed on local periodic nodes */ 1438 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1439 if (nnsp_has_cnst) { 1440 PetscScalar quad_value; 1441 temp_constraints++; 1442 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1443 for (j=0;j<size_of_constraint;j++) { 1444 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1445 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1446 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1447 } 1448 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1449 total_counts++; 1450 } 1451 for (k=0;k<nnsp_size;k++) { 1452 PetscReal real_value; 1453 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1454 for (j=0;j<size_of_constraint;j++) { 1455 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1456 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1457 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1458 } 1459 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1460 /* check if array is null on the connected component */ 1461 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1462 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1463 if (real_value > 0.0) { /* keep indices and values */ 1464 temp_constraints++; 1465 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1466 total_counts++; 1467 } 1468 } 1469 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1470 valid_constraints = temp_constraints; 1471 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1472 if (!pcbddc->use_nnsp_true && temp_constraints) { 1473 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1474 1475 #if defined(PETSC_MISSING_LAPACK_GESVD) 1476 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1477 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1478 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1479 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1480 from that computed using LAPACKgesvd 1481 -> This is due to a different computation of eigenvectors in LAPACKheev 1482 -> The quality of the POD-computed basis will be the same */ 1483 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1484 /* Store upper triangular part of correlation matrix */ 1485 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1486 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1487 for (j=0;j<temp_constraints;j++) { 1488 for (k=0;k<j+1;k++) { 1489 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1490 } 1491 } 1492 /* compute eigenvalues and eigenvectors of correlation matrix */ 1493 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1494 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1495 #if !defined(PETSC_USE_COMPLEX) 1496 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1497 #else 1498 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1499 #endif 1500 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1501 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1502 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1503 j=0; 1504 while (j < temp_constraints && singular_vals[j] < tol) j++; 1505 total_counts=total_counts-j; 1506 valid_constraints = temp_constraints-j; 1507 /* scale and copy POD basis into used quadrature memory */ 1508 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1509 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1510 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1511 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1512 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1513 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1514 if (j<temp_constraints) { 1515 PetscInt ii; 1516 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1517 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1518 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1519 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1520 for (k=0;k<temp_constraints-j;k++) { 1521 for (ii=0;ii<size_of_constraint;ii++) { 1522 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1523 } 1524 } 1525 } 1526 #else /* on missing GESVD */ 1527 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1528 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1529 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1530 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1531 #if !defined(PETSC_USE_COMPLEX) 1532 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1533 #else 1534 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1535 #endif 1536 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1537 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1538 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1539 k = temp_constraints; 1540 if (k > size_of_constraint) k = size_of_constraint; 1541 j = 0; 1542 while (j < k && singular_vals[k-j-1] < tol) j++; 1543 total_counts = total_counts-temp_constraints+k-j; 1544 valid_constraints = k-j; 1545 #endif /* on missing GESVD */ 1546 } 1547 /* setting change_of_basis flag is safe now */ 1548 if (boolforchange) { 1549 for (j=0;j<valid_constraints;j++) { 1550 PetscBTSet(change_basis,total_counts-j-1); 1551 } 1552 } 1553 } 1554 /* free index sets of faces, edges and vertices */ 1555 for (i=0;i<n_ISForFaces;i++) { 1556 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1557 } 1558 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1559 for (i=0;i<n_ISForEdges;i++) { 1560 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1561 } 1562 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1563 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1564 1565 /* free workspace */ 1566 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1567 ierr = PetscFree(work);CHKERRQ(ierr); 1568 #if defined(PETSC_USE_COMPLEX) 1569 ierr = PetscFree(rwork);CHKERRQ(ierr); 1570 #endif 1571 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1572 #if defined(PETSC_MISSING_LAPACK_GESVD) 1573 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1574 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1575 #endif 1576 } 1577 for (k=0;k<nnsp_size;k++) { 1578 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1579 } 1580 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1581 1582 /* set quantities in pcbddc data structure */ 1583 /* n_vertices defines the number of subdomain corners in the primal space */ 1584 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1585 pcbddc->local_primal_size = total_counts; 1586 pcbddc->n_vertices = n_vertices; 1587 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1588 1589 /* Create constraint matrix */ 1590 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1591 /* so we need to set it up properly either with or without change of basis */ 1592 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1593 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1594 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1595 /* array to compute a local numbering of constraints : vertices first then constraints */ 1596 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1597 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1598 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1599 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1600 /* auxiliary stuff for basis change */ 1601 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1602 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1603 1604 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1605 total_primal_vertices=0; 1606 for (i=0;i<pcbddc->local_primal_size;i++) { 1607 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1608 if (size_of_constraint == 1) { 1609 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1610 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1611 aux_primal_minloc[total_primal_vertices]=0; 1612 total_primal_vertices++; 1613 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1614 PetscInt min_loc,min_index; 1615 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1616 /* find first untouched local node */ 1617 k = 0; 1618 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1619 min_index = global_indices[k]; 1620 min_loc = k; 1621 /* search the minimum among global nodes already untouched on the cc */ 1622 for (k=1;k<size_of_constraint;k++) { 1623 /* there can be more than one constraint on a single connected component */ 1624 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1625 min_index = global_indices[k]; 1626 min_loc = k; 1627 } 1628 } 1629 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1630 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1631 aux_primal_minloc[total_primal_vertices]=min_loc; 1632 total_primal_vertices++; 1633 } 1634 } 1635 /* determine if a QR strategy is needed for change of basis */ 1636 qr_needed = PETSC_FALSE; 1637 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1638 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1639 if (PetscBTLookup(change_basis,i)) { 1640 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1641 j = 0; 1642 for (k=0;k<size_of_constraint;k++) { 1643 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1644 j++; 1645 } 1646 } 1647 /* found more than one primal dof on the cc */ 1648 if (j > 1) { 1649 PetscBTSet(qr_needed_idx,i); 1650 qr_needed = PETSC_TRUE; 1651 } 1652 } 1653 } 1654 /* free workspace */ 1655 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1656 1657 /* permute indices in order to have a sorted set of vertices */ 1658 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1659 1660 /* nonzero structure of constraint matrix */ 1661 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1662 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1663 j=total_primal_vertices; 1664 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1665 if (!PetscBTLookup(change_basis,i)) { 1666 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1667 j++; 1668 } 1669 } 1670 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1671 ierr = PetscFree(nnz);CHKERRQ(ierr); 1672 /* set values in constraint matrix */ 1673 for (i=0;i<total_primal_vertices;i++) { 1674 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1675 } 1676 total_counts = total_primal_vertices; 1677 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1678 if (!PetscBTLookup(change_basis,i)) { 1679 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1680 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1681 total_counts++; 1682 } 1683 } 1684 /* assembling */ 1685 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1686 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1687 /* 1688 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1689 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1690 */ 1691 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1692 if (pcbddc->use_change_of_basis) { 1693 /* dual and primal dofs on a single cc */ 1694 PetscInt dual_dofs,primal_dofs; 1695 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1696 PetscInt primal_counter; 1697 /* working stuff for GEQRF */ 1698 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1699 PetscBLASInt lqr_work; 1700 /* working stuff for UNGQR */ 1701 PetscScalar *gqr_work,lgqr_work_t; 1702 PetscBLASInt lgqr_work; 1703 /* working stuff for TRTRS */ 1704 PetscScalar *trs_rhs; 1705 PetscBLASInt Blas_NRHS; 1706 /* pointers for values insertion into change of basis matrix */ 1707 PetscInt *start_rows,*start_cols; 1708 PetscScalar *start_vals; 1709 /* working stuff for values insertion */ 1710 PetscBT is_primal; 1711 1712 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1713 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1714 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1715 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1716 /* work arrays */ 1717 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1718 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1719 /* nonzeros per row */ 1720 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1721 if (PetscBTLookup(change_basis,i)) { 1722 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1723 if (PetscBTLookup(qr_needed_idx,i)) { 1724 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1725 } else { 1726 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1727 /* get local primal index on the cc */ 1728 j = 0; 1729 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1730 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1731 } 1732 } 1733 } 1734 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1735 ierr = PetscFree(nnz);CHKERRQ(ierr); 1736 /* Set initial identity in the matrix */ 1737 for (i=0;i<pcis->n_B;i++) { 1738 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1739 } 1740 1741 if (pcbddc->dbg_flag) { 1742 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1743 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1744 } 1745 1746 1747 /* Now we loop on the constraints which need a change of basis */ 1748 /* 1749 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1750 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1751 1752 Basic blocks of change of basis matrix T computed 1753 1754 - Using the following block transformation if there is only a primal dof on the cc 1755 (in the example, primal dof is the last one of the edge in LOCAL ordering 1756 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1757 | 1 0 ... 0 1 | 1758 | 0 1 ... 0 1 | 1759 | ... | 1760 | 0 ... 1 1 | 1761 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1762 1763 - via QR decomposition of constraints otherwise 1764 */ 1765 if (qr_needed) { 1766 /* space to store Q */ 1767 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1768 /* first we issue queries for optimal work */ 1769 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1770 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1771 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1772 lqr_work = -1; 1773 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1774 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1775 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1776 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1777 lgqr_work = -1; 1778 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1779 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1780 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1781 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1782 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1783 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1784 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1785 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1786 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1787 /* array to store scaling factors for reflectors */ 1788 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1789 /* array to store rhs and solution of triangular solver */ 1790 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1791 /* allocating workspace for check */ 1792 if (pcbddc->dbg_flag) { 1793 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1794 } 1795 } 1796 /* array to store whether a node is primal or not */ 1797 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1798 for (i=0;i<total_primal_vertices;i++) { 1799 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1800 } 1801 1802 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1803 /* -> using implicit ordering contained in temp_indices data */ 1804 total_counts = pcbddc->n_vertices; 1805 primal_counter = total_counts; 1806 while (total_counts<pcbddc->local_primal_size) { 1807 primal_dofs = 1; 1808 if (PetscBTLookup(change_basis,total_counts)) { 1809 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1810 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1811 primal_dofs++; 1812 } 1813 /* get constraint info */ 1814 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1815 dual_dofs = size_of_constraint-primal_dofs; 1816 1817 if (pcbddc->dbg_flag) { 1818 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1819 } 1820 1821 if (primal_dofs > 1) { /* QR */ 1822 1823 /* copy quadrature constraints for change of basis check */ 1824 if (pcbddc->dbg_flag) { 1825 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1826 } 1827 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1828 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1829 1830 /* compute QR decomposition of constraints */ 1831 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1832 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1833 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1834 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1835 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1836 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1837 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1838 1839 /* explictly compute R^-T */ 1840 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1841 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1842 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1843 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1844 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1845 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1846 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1847 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1848 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1849 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1850 1851 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1852 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1853 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1854 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1855 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1856 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1857 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1858 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1859 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1860 1861 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1862 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1863 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1864 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1865 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1866 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1867 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1868 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1869 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1870 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1871 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1872 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1873 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1874 1875 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1876 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1877 /* insert cols for primal dofs */ 1878 for (j=0;j<primal_dofs;j++) { 1879 start_vals = &qr_basis[j*size_of_constraint]; 1880 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1881 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1882 } 1883 /* insert cols for dual dofs */ 1884 for (j=0,k=0;j<dual_dofs;k++) { 1885 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1886 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1887 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1888 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1889 j++; 1890 } 1891 } 1892 1893 /* check change of basis */ 1894 if (pcbddc->dbg_flag) { 1895 PetscInt ii,jj; 1896 PetscBool valid_qr=PETSC_TRUE; 1897 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1898 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1899 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1900 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1901 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1902 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1903 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1904 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1905 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1906 for (jj=0;jj<size_of_constraint;jj++) { 1907 for (ii=0;ii<primal_dofs;ii++) { 1908 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1909 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1910 } 1911 } 1912 if (!valid_qr) { 1913 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1914 for (jj=0;jj<size_of_constraint;jj++) { 1915 for (ii=0;ii<primal_dofs;ii++) { 1916 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1917 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1918 } 1919 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1920 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1921 } 1922 } 1923 } 1924 } else { 1925 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1926 } 1927 } 1928 } else { /* simple transformation block */ 1929 PetscInt row,col; 1930 PetscScalar val; 1931 for (j=0;j<size_of_constraint;j++) { 1932 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 1933 if (!PetscBTLookup(is_primal,row)) { 1934 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1935 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 1936 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 1937 } else { 1938 for (k=0;k<size_of_constraint;k++) { 1939 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1940 if (row != col) { 1941 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1942 } else { 1943 val = 1.0; 1944 } 1945 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 1946 } 1947 } 1948 } 1949 } 1950 /* increment primal counter */ 1951 primal_counter += primal_dofs; 1952 } else { 1953 if (pcbddc->dbg_flag) { 1954 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1955 } 1956 } 1957 /* increment constraint counter total_counts */ 1958 total_counts += primal_dofs; 1959 } 1960 1961 /* free workspace */ 1962 if (qr_needed) { 1963 if (pcbddc->dbg_flag) { 1964 ierr = PetscFree(work);CHKERRQ(ierr); 1965 } 1966 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1967 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1968 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1969 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1970 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1971 } 1972 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1973 /* assembling */ 1974 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1975 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1976 /* 1977 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1978 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1979 */ 1980 } 1981 1982 /* flush dbg viewer */ 1983 if (pcbddc->dbg_flag) { 1984 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1985 } 1986 1987 /* free workspace */ 1988 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1989 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 1990 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1991 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1992 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1993 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1994 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1995 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1996 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1997 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1998 PetscFunctionReturn(0); 1999 } 2000 2001 #undef __FUNCT__ 2002 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2003 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2004 { 2005 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2006 PC_IS *pcis = (PC_IS*)pc->data; 2007 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2008 PetscInt bs,ierr,i,vertex_size; 2009 PetscViewer viewer=pcbddc->dbg_viewer; 2010 2011 PetscFunctionBegin; 2012 /* Init local Graph struct */ 2013 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2014 2015 /* Check validity of the csr graph passed in by the user */ 2016 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2017 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2018 } 2019 2020 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2021 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2022 Mat mat_adj; 2023 const PetscInt *xadj,*adjncy; 2024 PetscBool flg_row=PETSC_TRUE; 2025 2026 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2027 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2028 if (!flg_row) { 2029 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2030 } 2031 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2032 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2033 if (!flg_row) { 2034 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2035 } 2036 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2037 } 2038 2039 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2040 vertex_size = 1; 2041 if (!pcbddc->user_provided_isfordofs) { 2042 if (!pcbddc->n_ISForDofs) { 2043 IS *custom_ISForDofs; 2044 2045 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2046 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2047 for (i=0;i<bs;i++) { 2048 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2049 } 2050 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2051 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2052 /* remove my references to IS objects */ 2053 for (i=0;i<bs;i++) { 2054 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2055 } 2056 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2057 } 2058 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2059 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2060 } 2061 2062 /* Setup of Graph */ 2063 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2064 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2065 } 2066 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2067 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2068 } 2069 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2070 2071 /* Graph's connected components analysis */ 2072 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2073 2074 /* print some info to stdout */ 2075 if (pcbddc->dbg_flag) { 2076 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2077 } 2078 PetscFunctionReturn(0); 2079 } 2080 2081 #undef __FUNCT__ 2082 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2083 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2084 { 2085 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2086 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2087 PetscErrorCode ierr; 2088 2089 PetscFunctionBegin; 2090 n = 0; 2091 vertices = 0; 2092 if (pcbddc->ConstraintMatrix) { 2093 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2094 for (i=0;i<local_primal_size;i++) { 2095 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2096 if (size_of_constraint == 1) n++; 2097 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2098 } 2099 if (vertices_idx) { 2100 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2101 n = 0; 2102 for (i=0;i<local_primal_size;i++) { 2103 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2104 if (size_of_constraint == 1) { 2105 vertices[n++]=row_cmat_indices[0]; 2106 } 2107 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2108 } 2109 } 2110 } 2111 *n_vertices = n; 2112 if (vertices_idx) *vertices_idx = vertices; 2113 PetscFunctionReturn(0); 2114 } 2115 2116 #undef __FUNCT__ 2117 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2118 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2119 { 2120 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2121 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2122 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2123 PetscBT touched; 2124 PetscErrorCode ierr; 2125 2126 /* This function assumes that the number of local constraints per connected component 2127 is not greater than the number of nodes defined for the connected component 2128 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2129 PetscFunctionBegin; 2130 n = 0; 2131 constraints_index = 0; 2132 if (pcbddc->ConstraintMatrix) { 2133 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2134 max_size_of_constraint = 0; 2135 for (i=0;i<local_primal_size;i++) { 2136 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2137 if (size_of_constraint > 1) { 2138 n++; 2139 } 2140 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2141 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2142 } 2143 if (constraints_idx) { 2144 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2145 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2146 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2147 n = 0; 2148 for (i=0;i<local_primal_size;i++) { 2149 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2150 if (size_of_constraint > 1) { 2151 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2152 /* find first untouched local node */ 2153 j = 0; 2154 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2155 min_index = row_cmat_global_indices[j]; 2156 min_loc = j; 2157 /* search the minimum among nodes not yet touched on the connected component 2158 since there can be more than one constraint on a single cc */ 2159 for (j=1;j<size_of_constraint;j++) { 2160 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2161 min_index = row_cmat_global_indices[j]; 2162 min_loc = j; 2163 } 2164 } 2165 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2166 constraints_index[n++] = row_cmat_indices[min_loc]; 2167 } 2168 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2169 } 2170 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2171 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2172 } 2173 } 2174 *n_constraints = n; 2175 if (constraints_idx) *constraints_idx = constraints_index; 2176 PetscFunctionReturn(0); 2177 } 2178 2179 /* the next two functions has been adapted from pcis.c */ 2180 #undef __FUNCT__ 2181 #define __FUNCT__ "PCBDDCApplySchur" 2182 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2183 { 2184 PetscErrorCode ierr; 2185 PC_IS *pcis = (PC_IS*)(pc->data); 2186 2187 PetscFunctionBegin; 2188 if (!vec2_B) { vec2_B = v; } 2189 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2190 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2191 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2192 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2193 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2194 PetscFunctionReturn(0); 2195 } 2196 2197 #undef __FUNCT__ 2198 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2199 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2200 { 2201 PetscErrorCode ierr; 2202 PC_IS *pcis = (PC_IS*)(pc->data); 2203 2204 PetscFunctionBegin; 2205 if (!vec2_B) { vec2_B = v; } 2206 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2207 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2208 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2209 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2210 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 #undef __FUNCT__ 2215 #define __FUNCT__ "PCBDDCSubsetNumbering" 2216 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2217 { 2218 Vec local_vec,global_vec; 2219 IS seqis,paris; 2220 VecScatter scatter_ctx; 2221 PetscScalar *array; 2222 PetscInt *temp_global_dofs; 2223 PetscScalar globalsum; 2224 PetscInt i,j,s; 2225 PetscInt nlocals,first_index,old_index,max_local; 2226 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2227 PetscMPIInt *dof_sizes,*dof_displs; 2228 PetscBool first_found; 2229 PetscErrorCode ierr; 2230 2231 PetscFunctionBegin; 2232 /* mpi buffers */ 2233 MPI_Comm_size(comm,&size_prec_comm); 2234 MPI_Comm_rank(comm,&rank_prec_comm); 2235 j = ( !rank_prec_comm ? size_prec_comm : 0); 2236 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2237 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2238 /* get maximum size of subset */ 2239 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2240 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2241 max_local = 0; 2242 if (n_local_dofs) { 2243 max_local = temp_global_dofs[0]; 2244 for (i=1;i<n_local_dofs;i++) { 2245 if (max_local < temp_global_dofs[i] ) { 2246 max_local = temp_global_dofs[i]; 2247 } 2248 } 2249 } 2250 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2251 max_global++; 2252 max_local = 0; 2253 if (n_local_dofs) { 2254 max_local = local_dofs[0]; 2255 for (i=1;i<n_local_dofs;i++) { 2256 if (max_local < local_dofs[i] ) { 2257 max_local = local_dofs[i]; 2258 } 2259 } 2260 } 2261 max_local++; 2262 /* allocate workspace */ 2263 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2264 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2265 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2266 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2267 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2268 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2269 /* create scatter */ 2270 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2271 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2272 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2273 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2274 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2275 /* init array */ 2276 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2277 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2278 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2279 if (local_dofs_mult) { 2280 for (i=0;i<n_local_dofs;i++) { 2281 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2282 } 2283 } else { 2284 for (i=0;i<n_local_dofs;i++) { 2285 array[local_dofs[i]]=1.0; 2286 } 2287 } 2288 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2289 /* scatter into global vec and get total number of global dofs */ 2290 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2291 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2292 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2293 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2294 /* Fill global_vec with cumulative function for global numbering */ 2295 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2296 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2297 nlocals = 0; 2298 first_index = -1; 2299 first_found = PETSC_FALSE; 2300 for (i=0;i<s;i++) { 2301 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2302 first_found = PETSC_TRUE; 2303 first_index = i; 2304 } 2305 nlocals += (PetscInt)PetscRealPart(array[i]); 2306 } 2307 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2308 if (!rank_prec_comm) { 2309 dof_displs[0]=0; 2310 for (i=1;i<size_prec_comm;i++) { 2311 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2312 } 2313 } 2314 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2315 if (first_found) { 2316 array[first_index] += (PetscScalar)nlocals; 2317 old_index = first_index; 2318 for (i=first_index+1;i<s;i++) { 2319 if (PetscRealPart(array[i]) > 0.0) { 2320 array[i] += array[old_index]; 2321 old_index = i; 2322 } 2323 } 2324 } 2325 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2326 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2327 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2328 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2329 /* get global ordering of local dofs */ 2330 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2331 if (local_dofs_mult) { 2332 for (i=0;i<n_local_dofs;i++) { 2333 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2334 } 2335 } else { 2336 for (i=0;i<n_local_dofs;i++) { 2337 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2338 } 2339 } 2340 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2341 /* free workspace */ 2342 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2343 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2344 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2345 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2346 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2347 /* return pointer to global ordering of local dofs */ 2348 *global_numbering_subset = temp_global_dofs; 2349 PetscFunctionReturn(0); 2350 } 2351 2352 #undef __FUNCT__ 2353 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2354 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2355 { 2356 PetscInt i,j; 2357 PetscScalar *alphas; 2358 PetscErrorCode ierr; 2359 2360 PetscFunctionBegin; 2361 /* this implements stabilized Gram-Schmidt */ 2362 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2363 for (i=0;i<n;i++) { 2364 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2365 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2366 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2367 } 2368 ierr = PetscFree(alphas);CHKERRQ(ierr); 2369 PetscFunctionReturn(0); 2370 } 2371 2372 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2373 #undef __FUNCT__ 2374 #define __FUNCT__ "MatConvert_IS_AIJ" 2375 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2376 { 2377 Mat new_mat; 2378 Mat_IS *matis = (Mat_IS*)(mat->data); 2379 /* info on mat */ 2380 PetscInt bs,rows,cols; 2381 PetscInt lrows,lcols; 2382 PetscInt local_rows,local_cols; 2383 PetscMPIInt nsubdomains; 2384 /* preallocation */ 2385 Vec vec_dnz,vec_onz; 2386 PetscScalar *my_dnz,*my_onz,*array; 2387 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2388 PetscInt index_row,index_col,owner; 2389 PetscInt *local_indices,*global_indices; 2390 /* work */ 2391 PetscInt i,j; 2392 PetscErrorCode ierr; 2393 2394 PetscFunctionBegin; 2395 /* CHECKS */ 2396 /* get info from mat */ 2397 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2398 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2399 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2400 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2401 2402 /* MAT_INITIAL_MATRIX */ 2403 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2404 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2405 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2406 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2407 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2408 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2409 2410 /* 2411 preallocation 2412 */ 2413 2414 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2415 /* 2416 Some vectors are needed to sum up properly on shared interface dofs. 2417 Note that preallocation is not exact, since it overestimates nonzeros 2418 */ 2419 /* 2420 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2421 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2422 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2423 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2424 */ 2425 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2426 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2427 /* All processes need to compute entire row ownership */ 2428 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2429 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2430 for (i=0;i<nsubdomains;i++) { 2431 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2432 row_ownership[j]=i; 2433 } 2434 } 2435 /* map indices of local mat to global values */ 2436 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2437 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2438 for (i=0;i<local_rows;i++) local_indices[i]=i; 2439 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2440 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2441 2442 /* 2443 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2444 then, they will be summed up properly. This way, preallocation is always sufficient 2445 */ 2446 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2447 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2448 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2449 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2450 for (i=0;i<local_rows;i++) { 2451 index_row = global_indices[i]; 2452 for (j=i;j<local_rows;j++) { 2453 owner = row_ownership[index_row]; 2454 index_col = global_indices[j]; 2455 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2456 my_dnz[i] += 1.0; 2457 } else { /* offdiag block */ 2458 my_onz[i] += 1.0; 2459 } 2460 /* same as before, interchanging rows and cols */ 2461 if (i != j) { 2462 owner = row_ownership[index_col]; 2463 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2464 my_dnz[j] += 1.0; 2465 } else { 2466 my_onz[j] += 1.0; 2467 } 2468 } 2469 } 2470 } 2471 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2472 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2473 if (local_rows) { /* multilevel guard */ 2474 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2475 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2476 } 2477 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2478 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2479 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2480 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2481 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2482 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2483 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2484 2485 /* set computed preallocation in dnz and onz */ 2486 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2487 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2488 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2489 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2490 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2491 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2492 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2493 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2494 2495 /* Resize preallocation if overestimated */ 2496 for (i=0;i<lrows;i++) { 2497 dnz[i] = PetscMin(dnz[i],lcols); 2498 onz[i] = PetscMin(onz[i],cols-lcols); 2499 } 2500 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2501 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2502 2503 /* 2504 Set values. Very Basic. 2505 */ 2506 for (i=0;i<local_rows;i++) { 2507 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2508 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2509 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2510 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2511 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2512 } 2513 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2514 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2515 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2516 2517 /* get back mat */ 2518 *M = new_mat; 2519 PetscFunctionReturn(0); 2520 } 2521 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "MatISSubassemble_Private" 2524 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2525 { 2526 Mat subdomain_adj; 2527 IS new_ranks,ranks_send_to; 2528 MatPartitioning partitioner; 2529 Mat_IS *matis; 2530 PetscInt n_neighs,*neighs,*n_shared,**shared; 2531 PetscInt prank; 2532 PetscMPIInt size,rank,color; 2533 PetscInt *xadj,*adjncy,*oldranks; 2534 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2535 PetscInt i,j,n_subdomains,local_size,threshold=0; 2536 PetscErrorCode ierr; 2537 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2538 PetscSubcomm subcomm; 2539 2540 PetscFunctionBegin; 2541 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2542 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2543 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2544 2545 /* Get info on mapping */ 2546 matis = (Mat_IS*)(mat->data); 2547 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2548 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2549 2550 /* build local CSR graph of subdomains' connectivity */ 2551 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2552 xadj[0] = 0; 2553 xadj[1] = PetscMax(n_neighs-1,0); 2554 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2555 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2556 2557 if (threshold) { 2558 PetscInt* count,min_threshold; 2559 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2560 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2561 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2562 for (j=0;j<n_shared[i];j++) { 2563 count[shared[i][j]] += 1; 2564 } 2565 } 2566 /* adapt threshold since we dont want to lose significant connections */ 2567 min_threshold = n_neighs; 2568 for (i=1;i<n_neighs;i++) { 2569 for (j=0;j<n_shared[i];j++) { 2570 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2571 } 2572 } 2573 PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold); 2574 threshold = PetscMax(min_threshold+1,threshold); 2575 PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold); 2576 xadj[1] = 0; 2577 for (i=1;i<n_neighs;i++) { 2578 for (j=0;j<n_shared[i];j++) { 2579 if (count[shared[i][j]] < threshold) { 2580 adjncy[xadj[1]] = neighs[i]; 2581 adjncy_wgt[xadj[1]] = n_shared[i]; 2582 xadj[1]++; 2583 break; 2584 } 2585 } 2586 } 2587 ierr = PetscFree(count);CHKERRQ(ierr); 2588 } else { 2589 if (xadj[1]) { 2590 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2591 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2592 } 2593 } 2594 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2595 if (use_square) { 2596 for (i=0;i<xadj[1];i++) { 2597 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2598 } 2599 } 2600 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2601 2602 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2603 2604 /* 2605 Restrict work on active processes only. 2606 */ 2607 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2608 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2609 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2610 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2611 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2612 if (color) { 2613 ierr = PetscFree(xadj);CHKERRQ(ierr); 2614 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2615 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2616 } else { 2617 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2618 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2619 prank = rank; 2620 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2621 for (i=0;i<size;i++) { 2622 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2623 } 2624 for (i=0;i<xadj[1];i++) { 2625 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2626 } 2627 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2628 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2629 n_subdomains = (PetscInt)size; 2630 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2631 2632 /* Partition */ 2633 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2634 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2635 if (use_vwgt) { 2636 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2637 v_wgt[0] = local_size; 2638 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2639 } 2640 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2641 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2642 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2643 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2644 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2645 2646 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2647 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2648 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2649 /* clean up */ 2650 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2651 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2652 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2653 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2654 } 2655 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2656 2657 /* assemble parallel IS for sends */ 2658 i = 1; 2659 if (color) i=0; 2660 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2661 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2662 2663 /* get back IS */ 2664 *is_sends = ranks_send_to; 2665 PetscFunctionReturn(0); 2666 } 2667 2668 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2669 2670 #undef __FUNCT__ 2671 #define __FUNCT__ "MatISSubassemble" 2672 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2673 { 2674 Mat local_mat,new_mat; 2675 Mat_IS *matis; 2676 IS is_sends_internal; 2677 PetscInt rows,cols; 2678 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2679 PetscBool ismatis,isdense; 2680 ISLocalToGlobalMapping l2gmap; 2681 PetscInt* l2gmap_indices; 2682 const PetscInt* is_indices; 2683 MatType new_local_type; 2684 MatTypePrivate new_local_type_private; 2685 /* buffers */ 2686 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2687 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2688 /* MPI */ 2689 MPI_Comm comm; 2690 PetscMPIInt n_sends,n_recvs,commsize; 2691 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2692 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2693 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2694 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2695 PetscErrorCode ierr; 2696 2697 PetscFunctionBegin; 2698 /* checks */ 2699 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2700 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2701 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2702 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2703 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2704 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2705 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2706 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2707 PetscValidLogicalCollectiveInt(mat,bs,0); 2708 /* prepare IS for sending if not provided */ 2709 if (!is_sends) { 2710 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2711 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2712 } else { 2713 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2714 is_sends_internal = is_sends; 2715 } 2716 2717 /* get pointer of MATIS data */ 2718 matis = (Mat_IS*)mat->data; 2719 2720 /* get comm */ 2721 comm = PetscObjectComm((PetscObject)mat); 2722 2723 /* compute number of sends */ 2724 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2725 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2726 2727 /* compute number of receives */ 2728 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2729 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2730 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2731 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2732 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2733 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2734 ierr = PetscFree(iflags);CHKERRQ(ierr); 2735 2736 /* prepare send/receive buffers */ 2737 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2738 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2739 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2740 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2741 2742 /* Get data from local mat */ 2743 if (!isdense) { 2744 /* TODO: See below some guidelines on how to prepare the local buffers */ 2745 /* 2746 send_buffer_vals should contain the raw values of the local matrix 2747 send_buffer_idxs should contain: 2748 - MatType_PRIVATE type 2749 - PetscInt size_of_l2gmap 2750 - PetscInt global_row_indices[size_of_l2gmap] 2751 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2752 */ 2753 } else { 2754 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2755 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2756 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2757 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2758 send_buffer_idxs[1] = i; 2759 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2760 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2761 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2762 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2763 for (i=0;i<n_sends;i++) { 2764 ilengths_vals[is_indices[i]] = len*len; 2765 ilengths_idxs[is_indices[i]] = len+2; 2766 } 2767 } 2768 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2769 buf_size_idxs = 0; 2770 buf_size_vals = 0; 2771 for (i=0;i<n_recvs;i++) { 2772 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2773 buf_size_vals += (PetscInt)olengths_vals[i]; 2774 } 2775 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2776 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2777 2778 /* get new tags for clean communications */ 2779 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2780 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2781 2782 /* allocate for requests */ 2783 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2784 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2785 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2786 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2787 2788 /* communications */ 2789 ptr_idxs = recv_buffer_idxs; 2790 ptr_vals = recv_buffer_vals; 2791 for (i=0;i<n_recvs;i++) { 2792 source_dest = onodes[i]; 2793 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2794 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2795 ptr_idxs += olengths_idxs[i]; 2796 ptr_vals += olengths_vals[i]; 2797 } 2798 for (i=0;i<n_sends;i++) { 2799 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2800 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2801 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2802 } 2803 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2804 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2805 2806 /* assemble new l2g map */ 2807 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2808 ptr_idxs = recv_buffer_idxs; 2809 buf_size_idxs = 0; 2810 for (i=0;i<n_recvs;i++) { 2811 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2812 ptr_idxs += olengths_idxs[i]; 2813 } 2814 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2815 ptr_idxs = recv_buffer_idxs; 2816 buf_size_idxs = 0; 2817 for (i=0;i<n_recvs;i++) { 2818 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2819 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2820 ptr_idxs += olengths_idxs[i]; 2821 } 2822 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2823 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2824 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2825 2826 /* infer new local matrix type from received local matrices type */ 2827 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2828 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2829 new_local_type_private = MATAIJ_PRIVATE; 2830 new_local_type = MATSEQAIJ; 2831 if (n_recvs) { 2832 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 2833 ptr_idxs = recv_buffer_idxs; 2834 for (i=0;i<n_recvs;i++) { 2835 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2836 new_local_type_private = MATAIJ_PRIVATE; 2837 break; 2838 } 2839 ptr_idxs += olengths_idxs[i]; 2840 } 2841 switch (new_local_type_private) { 2842 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2843 new_local_type = MATSEQAIJ; 2844 bs = 1; 2845 break; 2846 case MATAIJ_PRIVATE: 2847 new_local_type = MATSEQAIJ; 2848 bs = 1; 2849 break; 2850 case MATBAIJ_PRIVATE: 2851 new_local_type = MATSEQBAIJ; 2852 break; 2853 case MATSBAIJ_PRIVATE: 2854 new_local_type = MATSEQSBAIJ; 2855 break; 2856 default: 2857 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2858 break; 2859 } 2860 } 2861 2862 /* create MATIS object */ 2863 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2864 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2865 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2866 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2867 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2868 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2869 2870 /* set values */ 2871 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2872 ptr_vals = recv_buffer_vals; 2873 ptr_idxs = recv_buffer_idxs; 2874 for (i=0;i<n_recvs;i++) { 2875 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2876 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2877 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2878 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2879 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2880 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2881 } 2882 ptr_idxs += olengths_idxs[i]; 2883 ptr_vals += olengths_vals[i]; 2884 } 2885 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2886 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2887 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2888 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2889 2890 { /* check */ 2891 Vec lvec,rvec; 2892 PetscReal infty_error; 2893 2894 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2895 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2896 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2897 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2898 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2899 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2900 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2901 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2902 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2903 } 2904 2905 /* free workspace */ 2906 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2907 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2908 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2909 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2910 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2911 if (isdense) { 2912 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2913 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2914 } else { 2915 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2916 } 2917 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2918 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2919 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2920 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2921 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2922 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2923 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2924 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2925 ierr = PetscFree(onodes);CHKERRQ(ierr); 2926 /* get back new mat */ 2927 *mat_n = new_mat; 2928 PetscFunctionReturn(0); 2929 } 2930 2931 #undef __FUNCT__ 2932 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2933 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2934 { 2935 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2936 PC_IS *pcis = (PC_IS*)pc->data; 2937 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2938 MatNullSpace CoarseNullSpace=NULL; 2939 ISLocalToGlobalMapping coarse_islg; 2940 IS coarse_is; 2941 PetscInt max_it,coarse_size,*local_primal_indices=NULL; 2942 PetscInt im_active=-1,active_procs=-1; 2943 PC pc_temp; 2944 PCType coarse_pc_type; 2945 KSPType coarse_ksp_type; 2946 PetscBool multilevel_requested,multilevel_allowed; 2947 PetscBool setsym,issym,isbddc,isnn; 2948 MatStructure matstruct; 2949 PetscErrorCode ierr; 2950 2951 PetscFunctionBegin; 2952 /* Assign global numbering to coarse dofs */ 2953 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2954 2955 /* infer some info from user */ 2956 issym = PETSC_FALSE; 2957 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2958 multilevel_allowed = PETSC_FALSE; 2959 multilevel_requested = PETSC_FALSE; 2960 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 2961 if (multilevel_requested) { 2962 /* count "active processes" */ 2963 im_active = !!(pcis->n); 2964 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2965 if (active_procs/pcbddc->coarsening_ratio < 2) { 2966 multilevel_allowed = PETSC_FALSE; 2967 } else { 2968 multilevel_allowed = PETSC_TRUE; 2969 } 2970 } 2971 2972 /* set defaults for coarse KSP and PC */ 2973 if (multilevel_allowed) { 2974 if (issym) { 2975 coarse_ksp_type = KSPRICHARDSON; 2976 } else { 2977 coarse_ksp_type = KSPCHEBYSHEV; 2978 } 2979 coarse_pc_type = PCBDDC; 2980 } else { 2981 coarse_ksp_type = KSPPREONLY; 2982 coarse_pc_type = PCREDUNDANT; 2983 } 2984 2985 /* create the coarse KSP object only once with defaults */ 2986 if (!pcbddc->coarse_ksp) { 2987 char prefix[256],str_level[3]; 2988 size_t len; 2989 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 2990 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2991 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 2992 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2993 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2994 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2995 /* prefix */ 2996 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 2997 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2998 if (!pcbddc->current_level) { 2999 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3000 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3001 } else { 3002 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3003 if (pcbddc->current_level>1) len -= 2; 3004 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3005 *(prefix+len)='\0'; 3006 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3007 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3008 } 3009 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3010 } 3011 /* allow user customization */ 3012 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3013 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3014 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3015 3016 /* get some info after set from options */ 3017 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3018 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3019 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3020 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3021 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3022 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3023 isbddc = PETSC_FALSE; 3024 } 3025 3026 /* propagate BDDC info to the next level */ 3027 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3028 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3029 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3030 3031 /* creates temporary MATIS object for coarse matrix */ 3032 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3033 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3034 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3035 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3036 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3037 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3038 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3039 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3040 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3041 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 3042 3043 /* assemble coarse matrix */ 3044 if (isbddc || isnn) { 3045 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3046 } else { 3047 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3048 } 3049 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3050 3051 /* create local to global scatters for coarse problem */ 3052 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3053 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3054 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3055 3056 /* propagate symmetry info to coarse matrix */ 3057 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3058 3059 /* Compute coarse null space (special handling by BDDC only) */ 3060 if (pcbddc->NullSpace) { 3061 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3062 if (isbddc) { 3063 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3064 } else { 3065 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3066 } 3067 } 3068 3069 /* set operators */ 3070 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3071 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3072 3073 /* additional KSP customization */ 3074 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3075 if (max_it < 5) { 3076 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3077 } 3078 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3079 3080 3081 /* print some info if requested */ 3082 if (pcbddc->dbg_flag) { 3083 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3084 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3085 if (!multilevel_allowed) { 3086 if (multilevel_requested) { 3087 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3088 } else if (pcbddc->max_levels) { 3089 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3090 } 3091 } 3092 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3093 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3094 } 3095 3096 /* setup coarse ksp */ 3097 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3098 if (pcbddc->dbg_flag) { 3099 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3100 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3101 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3102 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3103 } 3104 3105 /* Check coarse problem if requested */ 3106 if (pcbddc->dbg_flag) { 3107 KSP check_ksp; 3108 KSPType check_ksp_type; 3109 PC check_pc; 3110 Vec check_vec; 3111 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3112 PetscInt its; 3113 PetscBool ispreonly,compute; 3114 3115 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3116 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3117 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3118 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3119 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3120 if (ispreonly) { 3121 check_ksp_type = KSPPREONLY; 3122 compute = PETSC_FALSE; 3123 } else { 3124 if (issym) check_ksp_type = KSPCG; 3125 else check_ksp_type = KSPGMRES; 3126 compute = PETSC_TRUE; 3127 } 3128 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3129 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3130 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3131 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3132 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3133 /* create random vec */ 3134 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3135 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3136 if (CoarseNullSpace) { 3137 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3138 } 3139 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3140 /* solve coarse problem */ 3141 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3142 if (CoarseNullSpace) { 3143 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3144 } 3145 /* check coarse problem residual error */ 3146 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3147 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3148 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3149 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3150 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3151 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3152 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3153 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3154 /* get eigenvalue estimation if preonly has not been requested */ 3155 if (!ispreonly) { 3156 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3157 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3158 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3159 } 3160 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3161 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3162 } 3163 /* free memory */ 3164 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3165 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3166 PetscFunctionReturn(0); 3167 } 3168 3169 #undef __FUNCT__ 3170 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3171 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3172 { 3173 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3174 PC_IS* pcis = (PC_IS*)pc->data; 3175 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3176 PetscInt i,j,coarse_size; 3177 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3178 PetscErrorCode ierr; 3179 3180 PetscFunctionBegin; 3181 /* get indices in local ordering for vertices and constraints */ 3182 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3183 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3184 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3185 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3186 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3187 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3188 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3189 3190 /* Compute global number of coarse dofs */ 3191 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3192 3193 /* check numbering */ 3194 if (pcbddc->dbg_flag) { 3195 PetscScalar coarsesum,*array; 3196 3197 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3198 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3199 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3200 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3201 for (i=0;i<pcbddc->local_primal_size;i++) { 3202 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3203 } 3204 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3205 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3206 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3207 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3208 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3209 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3210 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3211 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3212 for (i=0;i<pcis->n;i++) { 3213 if (array[i] == 1.0) { 3214 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3215 } 3216 } 3217 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3218 for (i=0;i<pcis->n;i++) { 3219 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3220 } 3221 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3222 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3223 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3224 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3225 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3226 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3227 if (pcbddc->dbg_flag > 1) { 3228 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3229 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3230 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3231 for (i=0;i<pcbddc->local_primal_size;i++) { 3232 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]); 3233 } 3234 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3235 } 3236 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3237 } 3238 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3239 /* get back data */ 3240 *coarse_size_n = coarse_size; 3241 *local_primal_indices_n = local_primal_indices; 3242 PetscFunctionReturn(0); 3243 } 3244 3245 #undef __FUNCT__ 3246 #define __FUNCT__ "PCBDDCGlobalToLocal" 3247 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis) 3248 { 3249 PC_IS* pcis = (PC_IS*)pc->data; 3250 IS localis_t; 3251 PetscInt i,lsize,*idxs; 3252 PetscScalar *vals; 3253 PetscErrorCode ierr; 3254 3255 PetscFunctionBegin; 3256 /* get dirichlet indices in local ordering exploiting local to global map */ 3257 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 3258 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3259 for (i=0;i<lsize;i++) vals[i] = 1.0; 3260 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3261 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3262 ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3263 ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr); 3264 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3265 ierr = PetscFree(vals);CHKERRQ(ierr); 3266 ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr); 3267 /* now compute dirichlet set in local ordering */ 3268 ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3269 ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3270 ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3271 for (i=0,lsize=0;i<pcis->n;i++) { 3272 if (vals[i] == 1.0) { 3273 lsize++; 3274 } 3275 } 3276 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 3277 for (i=0,lsize=0;i<pcis->n;i++) { 3278 if (vals[i] == 1.0) { 3279 idxs[lsize++] = i; 3280 } 3281 } 3282 ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3283 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 3284 *localis = localis_t; 3285 PetscFunctionReturn(0); 3286 } 3287