1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCResetCustomization" 47 PetscErrorCode PCBDDCResetCustomization(PC pc) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 PetscInt i; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 56 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 59 for (i=0;i<pcbddc->n_ISForDofs;i++) { 60 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 61 } 62 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 63 pcbddc->n_ISForDofs = 0; 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "PCBDDCResetTopography" 69 PetscErrorCode PCBDDCResetTopography(PC pc) 70 { 71 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 72 PetscErrorCode ierr; 73 74 PetscFunctionBegin; 75 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 76 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 77 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 #undef __FUNCT__ 82 #define __FUNCT__ "PCBDDCResetSolvers" 83 PetscErrorCode PCBDDCResetSolvers(PC pc) 84 { 85 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 86 PetscErrorCode ierr; 87 88 PetscFunctionBegin; 89 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 90 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 101 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "PCBDDCCreateWorkVectors" 110 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 111 { 112 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 113 PC_IS *pcis = (PC_IS*)pc->data; 114 VecType impVecType; 115 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 116 PetscErrorCode ierr; 117 118 PetscFunctionBegin; 119 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 120 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 121 local_primal_size = n_constraints+n_vertices; 122 n_R = pcis->n-n_vertices; 123 /* local work vectors */ 124 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 125 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 126 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 127 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 128 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 129 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 130 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 131 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 132 if (n_constraints) { 133 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 134 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 135 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 136 } 137 PetscFunctionReturn(0); 138 } 139 140 #undef __FUNCT__ 141 #define __FUNCT__ "PCBDDCSetUpCorrection" 142 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 143 { 144 PetscErrorCode ierr; 145 /* pointers to pcis and pcbddc */ 146 PC_IS* pcis = (PC_IS*)pc->data; 147 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 148 /* submatrices of local problem */ 149 Mat A_RV,A_VR,A_VV; 150 /* working matrices */ 151 Mat M1,M2,M3,C_CR; 152 /* working vectors */ 153 Vec vec1_C,vec2_C,vec1_V,vec2_V; 154 /* additional working stuff */ 155 IS is_aux; 156 ISLocalToGlobalMapping BtoNmap; 157 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 158 const PetscScalar *array,*row_cmat_values; 159 const PetscInt *row_cmat_indices,*idx_R_local; 160 PetscInt *vertices,*idx_V_B,*auxindices; 161 PetscInt n_vertices,n_constraints,size_of_constraint; 162 PetscInt i,j,n_R,n_D,n_B; 163 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 164 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 165 MatType impMatType; 166 /* some shortcuts to scalars */ 167 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 168 /* for debugging purposes */ 169 PetscReal *coarsefunctions_errors,*constraints_errors; 170 171 PetscFunctionBegin; 172 /* get number of vertices and their local indices */ 173 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 174 n_constraints = pcbddc->local_primal_size-n_vertices; 175 /* Set Non-overlapping dimensions */ 176 n_B = pcis->n_B; n_D = pcis->n - n_B; 177 n_R = pcis->n-n_vertices; 178 179 /* Set types for local objects needed by BDDC precondtioner */ 180 impMatType = MATSEQDENSE; 181 182 /* Allocating some extra storage just to be safe */ 183 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 184 for (i=0;i<pcis->n;i++) auxindices[i]=i; 185 186 /* vertices in boundary numbering */ 187 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 188 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 189 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 190 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 191 if (i != n_vertices) { 192 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 193 } 194 195 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 196 if (n_constraints) { 197 /* work vectors */ 198 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 199 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 200 /* auxiliary matrices */ 201 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 202 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 203 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 204 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 205 206 /* Extract constraints on R nodes: C_{CR} */ 207 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 208 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 209 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 210 211 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 212 for (i=0;i<n_constraints;i++) { 213 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 214 /* Get row of constraint matrix in R numbering */ 215 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 216 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 217 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 218 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 219 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 220 /* Solve for row of constraint matrix in R numbering */ 221 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 222 /* Set values in local_auxmat2 */ 223 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 224 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 225 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 226 } 227 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 228 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 229 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 230 231 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 232 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 233 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 234 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 235 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 236 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 237 ierr = MatSetUp(M1);CHKERRQ(ierr); 238 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 239 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 240 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 241 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 242 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 243 ierr = MatDestroy(&M2);CHKERRQ(ierr); 244 ierr = MatDestroy(&M3);CHKERRQ(ierr); 245 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 246 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 247 } 248 249 /* Get submatrices from subdomain matrix */ 250 if (n_vertices) { 251 PetscInt ibs,mbs; 252 PetscBool issbaij; 253 Mat newmat; 254 255 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 256 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 257 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 258 if (ibs != mbs) { /* need to convert to SEQAIJ */ 259 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 260 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 261 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 262 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 263 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 264 } else { 265 /* this is safe */ 266 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 267 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 268 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 269 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 270 /* which of the two approaches is faster? */ 271 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 272 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 273 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 274 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 275 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 276 } else { 277 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 278 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 279 } 280 } 281 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 282 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 283 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 284 } 285 286 /* Matrix of coarse basis functions (local) */ 287 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 288 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 289 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 290 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 291 if (pcbddc->switch_static || pcbddc->dbg_flag) { 292 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 293 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 294 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 295 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 296 } 297 298 if (pcbddc->dbg_flag) { 299 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 300 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 301 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 302 } 303 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 304 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 305 306 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 307 308 /* vertices */ 309 for (i=0;i<n_vertices;i++) { 310 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 311 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 312 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 313 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 314 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 315 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 316 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 317 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 318 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 319 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 320 if (n_constraints) { 321 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 322 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 323 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 324 } 325 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 326 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 327 328 /* Set values in coarse basis function and subdomain part of coarse_mat */ 329 /* coarse basis functions */ 330 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 331 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 332 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 333 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 334 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 335 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 336 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 337 if (pcbddc->switch_static || pcbddc->dbg_flag) { 338 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 339 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 340 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 341 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 342 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 343 } 344 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 345 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 346 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 347 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 348 if (n_constraints) { 349 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 350 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 351 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 352 } 353 354 /* check */ 355 if (pcbddc->dbg_flag) { 356 /* assemble subdomain vector on local nodes */ 357 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 358 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 359 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 360 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 361 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 362 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 363 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 364 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 365 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 366 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 367 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 368 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 369 if (n_constraints) { 370 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 371 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 372 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 373 } 374 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 375 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 376 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 377 /* check saddle point solution */ 378 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 379 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 380 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 381 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 382 /* shift by the identity matrix */ 383 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 384 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 385 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 386 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 387 } 388 } 389 390 /* constraints */ 391 for (i=0;i<n_constraints;i++) { 392 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 393 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 394 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 395 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 396 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 397 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 398 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 399 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 400 if (n_vertices) { 401 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 402 } 403 /* Set values in coarse basis function and subdomain part of coarse_mat */ 404 /* coarse basis functions */ 405 j = i+n_vertices; /* don't touch this! */ 406 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 407 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 408 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 409 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 410 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 411 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 412 if (pcbddc->switch_static || pcbddc->dbg_flag) { 413 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 414 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 415 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 416 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 417 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 418 } 419 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 420 if (n_vertices) { 421 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 422 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 423 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 424 } 425 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 426 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 427 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 428 429 if (pcbddc->dbg_flag) { 430 /* assemble subdomain vector on nodes */ 431 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 432 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 433 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 434 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 435 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 436 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 437 /* assemble subdomain vector of lagrange multipliers */ 438 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 439 if (n_vertices) { 440 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 441 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 442 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 443 } 444 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 445 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 446 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 447 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 449 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 450 /* check saddle point solution */ 451 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 452 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 453 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 454 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 455 /* shift by the identity matrix */ 456 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 457 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 458 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 459 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 460 } 461 } 462 /* call assembling routines for local coarse basis */ 463 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 464 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 465 if (pcbddc->switch_static || pcbddc->dbg_flag) { 466 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 467 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 468 } 469 470 /* compute other basis functions for non-symmetric problems */ 471 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 472 if (!setsym || (setsym && !issym)) { 473 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 474 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 475 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 476 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 477 if (pcbddc->switch_static || pcbddc->dbg_flag) { 478 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 479 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 480 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 481 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 482 } 483 for (i=0;i<pcbddc->local_primal_size;i++) { 484 if (n_constraints) { 485 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 486 for (j=0;j<n_constraints;j++) { 487 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 488 } 489 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 490 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 491 } 492 if (i<n_vertices) { 493 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 494 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 495 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 496 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 497 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 498 if (n_constraints) { 499 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 500 } 501 } else { 502 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 503 } 504 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 505 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 506 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 507 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 508 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 509 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 510 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 511 if (i<n_vertices) { 512 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 513 } 514 if (pcbddc->switch_static || pcbddc->dbg_flag) { 515 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 516 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 517 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 518 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 519 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 520 } 521 522 if (pcbddc->dbg_flag) { 523 /* assemble subdomain vector on nodes */ 524 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 525 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 526 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 527 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 528 if (i<n_vertices) { 529 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 530 } 531 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 532 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 533 /* assemble subdomain vector of lagrange multipliers */ 534 for (j=0;j<pcbddc->local_primal_size;j++) { 535 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 536 } 537 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 538 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 539 /* check saddle point solution */ 540 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 541 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 542 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 543 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 544 /* shift by the identity matrix */ 545 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 546 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 547 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 548 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 549 } 550 } 551 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 553 if (pcbddc->switch_static || pcbddc->dbg_flag) { 554 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 555 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 556 } 557 } 558 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 559 /* Checking coarse_sub_mat and coarse basis functios */ 560 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 561 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 562 if (pcbddc->dbg_flag) { 563 Mat coarse_sub_mat; 564 Mat AUXMAT,TM1,TM2,TM3,TM4; 565 Mat coarse_phi_D,coarse_phi_B; 566 Mat coarse_psi_D,coarse_psi_B; 567 Mat A_II,A_BB,A_IB,A_BI; 568 MatType checkmattype=MATSEQAIJ; 569 PetscReal real_value; 570 571 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 572 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 573 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 574 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 575 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 576 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 577 if (pcbddc->coarse_psi_B) { 578 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 579 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 580 } 581 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 582 583 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 584 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 585 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 586 if (pcbddc->coarse_psi_B) { 587 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 588 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 589 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 590 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 591 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 592 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 593 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 594 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 595 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 596 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 597 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 598 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 599 } else { 600 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 601 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 602 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 603 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 604 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 605 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 606 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 607 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 608 } 609 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 610 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 611 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 612 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 613 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 614 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 615 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 616 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 617 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 619 for (i=0;i<pcbddc->local_primal_size;i++) { 620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 621 } 622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 623 for (i=0;i<pcbddc->local_primal_size;i++) { 624 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 625 } 626 if (pcbddc->coarse_psi_B) { 627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 628 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 629 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 630 } 631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 632 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 633 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 634 } 635 } 636 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 637 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 638 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 639 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 640 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 641 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 642 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 643 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 644 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 645 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 646 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 647 if (pcbddc->coarse_psi_B) { 648 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 649 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 650 } 651 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 652 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 653 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 654 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 655 } 656 /* free memory */ 657 if (n_vertices) { 658 ierr = PetscFree(vertices);CHKERRQ(ierr); 659 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 660 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 661 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 662 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 663 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 664 } 665 if (n_constraints) { 666 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 667 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 668 ierr = MatDestroy(&M1);CHKERRQ(ierr); 669 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 670 } 671 ierr = PetscFree(auxindices);CHKERRQ(ierr); 672 /* get back data */ 673 *coarse_submat_vals_n = coarse_submat_vals; 674 PetscFunctionReturn(0); 675 } 676 677 #undef __FUNCT__ 678 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 679 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 680 { 681 PC_IS* pcis = (PC_IS*)(pc->data); 682 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 683 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 684 PetscBool issbaij,isbaij; 685 /* manage repeated solves */ 686 MatReuse reuse; 687 MatStructure matstruct; 688 PetscErrorCode ierr; 689 690 PetscFunctionBegin; 691 /* get mat flags */ 692 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 693 reuse = MAT_INITIAL_MATRIX; 694 if (pc->setupcalled) { 695 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 696 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 697 if (matstruct == SAME_NONZERO_PATTERN) { 698 reuse = MAT_REUSE_MATRIX; 699 } else { 700 reuse = MAT_INITIAL_MATRIX; 701 } 702 } 703 if (reuse == MAT_INITIAL_MATRIX) { 704 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 705 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 706 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 707 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 708 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 709 } 710 711 /* transform local matrices if needed */ 712 if (pcbddc->use_change_of_basis) { 713 Mat change_mat_all; 714 PetscScalar *row_cmat_values; 715 PetscInt *row_cmat_indices; 716 PetscInt *nnz,*is_indices,*temp_indices; 717 PetscInt i,j,k,n_D,n_B; 718 719 /* Get Non-overlapping dimensions */ 720 n_B = pcis->n_B; 721 n_D = pcis->n-n_B; 722 723 /* compute nonzero structure of change of basis on all local nodes */ 724 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 725 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 726 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 727 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 728 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 729 k=1; 730 for (i=0;i<n_B;i++) { 731 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 732 nnz[is_indices[i]]=j; 733 if (k < j) k = j; 734 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 735 } 736 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 737 /* assemble change of basis matrix on the whole set of local dofs */ 738 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 739 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 740 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 741 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 742 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 743 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 744 for (i=0;i<n_D;i++) { 745 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 746 } 747 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 748 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 749 for (i=0;i<n_B;i++) { 750 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 751 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 752 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 753 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 754 } 755 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 756 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 757 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 758 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 759 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 760 if (!issbaij && !isbaij) { 761 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 762 } else { 763 Mat work_mat; 764 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 765 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 766 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 767 } 768 /* 769 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 770 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 771 */ 772 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 773 ierr = PetscFree(nnz);CHKERRQ(ierr); 774 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 775 } else { 776 /* without change of basis, the local matrix is unchanged */ 777 if (!pcbddc->local_mat) { 778 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 779 pcbddc->local_mat = matis->A; 780 } 781 } 782 783 /* get submatrices */ 784 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 785 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 786 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 787 if (!issbaij) { 788 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 789 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 790 } else { 791 Mat newmat; 792 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 793 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 794 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 795 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 796 } 797 PetscFunctionReturn(0); 798 } 799 800 #undef __FUNCT__ 801 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 802 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 803 { 804 PC_IS* pcis = (PC_IS*)(pc->data); 805 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 806 IS is_aux1,is_aux2; 807 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 808 PetscInt n_vertices,i,j,n_R,n_D,n_B; 809 PetscInt vbs,bs; 810 PetscBT bitmask; 811 PetscErrorCode ierr; 812 813 PetscFunctionBegin; 814 /* Set Non-overlapping dimensions */ 815 n_B = pcis->n_B; n_D = pcis->n - n_B; 816 /* get vertex indices from constraint matrix */ 817 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 818 819 /* create auxiliary bitmask */ 820 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 821 for (i=0;i<n_vertices;i++) { 822 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 823 } 824 825 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 826 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 827 for (i=0, n_R=0; i<pcis->n; i++) { 828 if (!PetscBTLookup(bitmask,i)) { 829 idx_R_local[n_R] = i; 830 n_R++; 831 } 832 } 833 834 /* Block code */ 835 vbs = 1; 836 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 837 if (bs>1 && !(n_vertices%bs)) { 838 PetscBool is_blocked = PETSC_TRUE; 839 PetscInt *vary; 840 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 841 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 842 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 843 for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++; 844 for (i=0; i<n_vertices; i++) { 845 if (vary[i]!=0 && vary[i]!=bs) { 846 is_blocked = PETSC_FALSE; 847 break; 848 } 849 } 850 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 851 vbs = bs; 852 for (i=0;i<n_R/vbs;i++) { 853 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 854 } 855 } 856 ierr = PetscFree(vary);CHKERRQ(ierr); 857 } 858 ierr = PetscFree(vertices);CHKERRQ(ierr); 859 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 860 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 861 862 /* print some info if requested */ 863 if (pcbddc->dbg_flag) { 864 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 865 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 866 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 867 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 868 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 869 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 870 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 871 } 872 873 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 874 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 875 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 876 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 877 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 878 for (i=0; i<n_D; i++) { 879 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 880 } 881 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 882 for (i=0, j=0; i<n_R; i++) { 883 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 884 aux_array1[j++] = i; 885 } 886 } 887 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 888 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 889 for (i=0, j=0; i<n_B; i++) { 890 if (!PetscBTLookup(bitmask,is_indices[i])) { 891 aux_array2[j++] = i; 892 } 893 } 894 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 895 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 896 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 897 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 898 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 899 900 if (pcbddc->switch_static || pcbddc->dbg_flag) { 901 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 902 for (i=0, j=0; i<n_R; i++) { 903 if (PetscBTLookup(bitmask,idx_R_local[i])) { 904 aux_array1[j++] = i; 905 } 906 } 907 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 908 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 909 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 910 } 911 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 912 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 913 PetscFunctionReturn(0); 914 } 915 916 917 #undef __FUNCT__ 918 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 919 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 920 { 921 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 922 PC_IS *pcis = (PC_IS*)pc->data; 923 PC pc_temp; 924 Mat A_RR; 925 Vec vec1,vec2,vec3; 926 MatStructure matstruct; 927 PetscScalar m_one = -1.0; 928 PetscReal value; 929 PetscInt n_D,n_R,ibs,mbs; 930 PetscBool use_exact,use_exact_reduced,issbaij; 931 PetscErrorCode ierr; 932 /* prefixes stuff */ 933 char dir_prefix[256],neu_prefix[256],str_level[3]; 934 size_t len; 935 936 PetscFunctionBegin; 937 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 938 939 /* compute prefixes */ 940 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 941 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 942 if (!pcbddc->current_level) { 943 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 944 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 945 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 946 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 947 } else { 948 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 949 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 950 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 951 len -= 15; /* remove "pc_bddc_coarse_" */ 952 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 953 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 954 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 955 *(dir_prefix+len)='\0'; 956 *(neu_prefix+len)='\0'; 957 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 958 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 959 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 960 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 961 } 962 963 /* DIRICHLET PROBLEM */ 964 /* Matrix for Dirichlet problem is pcis->A_II */ 965 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 966 if (!pcbddc->ksp_D) { /* create object if not yet build */ 967 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 968 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 969 /* default */ 970 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 971 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 972 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 973 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 974 if (issbaij) { 975 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 976 } else { 977 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 978 } 979 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 980 } 981 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 982 /* Allow user's customization */ 983 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 984 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 985 if (!n_D) { 986 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 987 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 988 } 989 /* Set Up KSP for Dirichlet problem of BDDC */ 990 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 991 /* set ksp_D into pcis data */ 992 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 993 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 994 pcis->ksp_D = pcbddc->ksp_D; 995 996 /* NEUMANN PROBLEM */ 997 /* Matrix for Neumann problem is A_RR -> we need to create it */ 998 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 999 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1000 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1001 if (ibs != mbs) { 1002 Mat newmat; 1003 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1004 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1005 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1006 } else { 1007 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1008 } 1009 if (!pcbddc->ksp_R) { /* create object if not yet build */ 1010 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1011 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1012 /* default */ 1013 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1014 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1015 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1016 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1017 if (issbaij) { 1018 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1019 } else { 1020 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1021 } 1022 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1023 } 1024 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1025 /* Allow user's customization */ 1026 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1027 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1028 if (!n_R) { 1029 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1030 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1031 } 1032 /* Set Up KSP for Neumann problem of BDDC */ 1033 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1034 1035 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1036 1037 /* Dirichlet */ 1038 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 1039 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1040 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1041 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 1042 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 1043 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1044 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1045 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1046 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1047 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1048 /* need to be adapted? */ 1049 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1050 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1051 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1052 /* print info */ 1053 if (pcbddc->dbg_flag) { 1054 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1055 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1056 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1057 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1058 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1059 } 1060 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1061 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1062 } 1063 1064 /* Neumann */ 1065 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 1066 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1067 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1068 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 1069 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 1070 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1071 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1072 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1073 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1074 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1075 /* need to be adapted? */ 1076 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1077 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1078 /* print info */ 1079 if (pcbddc->dbg_flag) { 1080 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1081 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1082 } 1083 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1084 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1085 } 1086 /* free Neumann problem's matrix */ 1087 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1093 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1094 { 1095 PetscErrorCode ierr; 1096 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1097 1098 PetscFunctionBegin; 1099 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1100 if (pcbddc->local_auxmat1) { 1101 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1102 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1103 } 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1109 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1110 { 1111 PetscErrorCode ierr; 1112 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1113 PC_IS* pcis = (PC_IS*) (pc->data); 1114 const PetscScalar zero = 0.0; 1115 1116 PetscFunctionBegin; 1117 /* Application of PHI^T (or PSI^T) */ 1118 if (pcbddc->coarse_psi_B) { 1119 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1120 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1121 } else { 1122 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1123 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1124 } 1125 /* Scatter data of coarse_rhs */ 1126 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1127 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1128 1129 /* Local solution on R nodes */ 1130 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1131 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1132 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1133 if (pcbddc->switch_static) { 1134 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1135 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1136 } 1137 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1138 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1139 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 if (pcbddc->switch_static) { 1142 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1143 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1144 } 1145 1146 /* Coarse solution */ 1147 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1148 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1149 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1150 } 1151 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1152 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1153 1154 /* Sum contributions from two levels */ 1155 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1156 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1162 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1163 { 1164 PetscErrorCode ierr; 1165 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1166 1167 PetscFunctionBegin; 1168 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1169 PetscFunctionReturn(0); 1170 } 1171 1172 #undef __FUNCT__ 1173 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1174 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1175 { 1176 PetscErrorCode ierr; 1177 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1178 1179 PetscFunctionBegin; 1180 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 /* uncomment for testing purposes */ 1185 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1188 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1189 { 1190 PetscErrorCode ierr; 1191 PC_IS* pcis = (PC_IS*)(pc->data); 1192 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1193 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1194 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1195 MatType impMatType=MATSEQAIJ; 1196 /* one and zero */ 1197 PetscScalar one=1.0,zero=0.0; 1198 /* space to store constraints and their local indices */ 1199 PetscScalar *temp_quadrature_constraint; 1200 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1201 /* iterators */ 1202 PetscInt i,j,k,total_counts,temp_start_ptr; 1203 /* stuff to store connected components stored in pcbddc->mat_graph */ 1204 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1205 PetscInt n_ISForFaces,n_ISForEdges; 1206 /* near null space stuff */ 1207 MatNullSpace nearnullsp; 1208 const Vec *nearnullvecs; 1209 Vec *localnearnullsp; 1210 PetscBool nnsp_has_cnst; 1211 PetscInt nnsp_size; 1212 PetscScalar *array; 1213 /* BLAS integers */ 1214 PetscBLASInt lwork,lierr; 1215 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1216 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1217 /* LAPACK working arrays for SVD or POD */ 1218 PetscBool skip_lapack; 1219 PetscScalar *work; 1220 PetscReal *singular_vals; 1221 #if defined(PETSC_USE_COMPLEX) 1222 PetscReal *rwork; 1223 #endif 1224 #if defined(PETSC_MISSING_LAPACK_GESVD) 1225 PetscBLASInt Blas_one_2=1; 1226 PetscScalar *temp_basis,*correlation_mat; 1227 #else 1228 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1229 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1230 #endif 1231 /* change of basis */ 1232 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1233 PetscBool boolforchange,qr_needed; 1234 PetscBT touched,change_basis,qr_needed_idx; 1235 /* auxiliary stuff */ 1236 PetscInt *nnz,*is_indices,*local_to_B; 1237 /* some quantities */ 1238 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1239 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1240 1241 1242 PetscFunctionBegin; 1243 /* Destroy Mat objects computed previously */ 1244 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1245 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1246 /* Get index sets for faces, edges and vertices from graph */ 1247 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1248 pcbddc->use_vertices = PETSC_TRUE; 1249 } 1250 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1251 /* HACK: provide functions to set change of basis */ 1252 if (!ISForVertices && pcbddc->NullSpace) { 1253 pcbddc->use_change_of_basis = PETSC_TRUE; 1254 pcbddc->use_change_on_faces = PETSC_FALSE; 1255 } 1256 /* print some info */ 1257 if (pcbddc->dbg_flag) { 1258 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1259 i = 0; 1260 if (ISForVertices) { 1261 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1262 } 1263 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1266 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1267 } 1268 /* check if near null space is attached to global mat */ 1269 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1270 if (nearnullsp) { 1271 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1272 } else { /* if near null space is not provided BDDC uses constants by default */ 1273 nnsp_size = 0; 1274 nnsp_has_cnst = PETSC_TRUE; 1275 } 1276 /* get max number of constraints on a single cc */ 1277 max_constraints = nnsp_size; 1278 if (nnsp_has_cnst) max_constraints++; 1279 1280 /* 1281 Evaluate maximum storage size needed by the procedure 1282 - temp_indices will contain start index of each constraint stored as follows 1283 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1284 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1285 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1286 */ 1287 total_counts = n_ISForFaces+n_ISForEdges; 1288 total_counts *= max_constraints; 1289 n_vertices = 0; 1290 if (ISForVertices) { 1291 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1292 } 1293 total_counts += n_vertices; 1294 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1295 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1296 total_counts = 0; 1297 max_size_of_constraint = 0; 1298 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1299 if (i<n_ISForEdges) { 1300 used_IS = &ISForEdges[i]; 1301 } else { 1302 used_IS = &ISForFaces[i-n_ISForEdges]; 1303 } 1304 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1305 total_counts += j; 1306 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1307 } 1308 total_counts *= max_constraints; 1309 total_counts += n_vertices; 1310 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1311 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1312 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1313 /* local to boundary numbering */ 1314 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1315 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1316 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1317 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1318 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1319 /* get local part of global near null space vectors */ 1320 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1321 for (k=0;k<nnsp_size;k++) { 1322 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1323 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1324 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1325 } 1326 1327 /* whether or not to skip lapack calls */ 1328 skip_lapack = PETSC_TRUE; 1329 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1330 1331 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1332 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1333 PetscScalar temp_work; 1334 #if defined(PETSC_MISSING_LAPACK_GESVD) 1335 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1336 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1337 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1338 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1339 #if defined(PETSC_USE_COMPLEX) 1340 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1341 #endif 1342 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1343 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1344 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1345 lwork = -1; 1346 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1347 #if !defined(PETSC_USE_COMPLEX) 1348 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1349 #else 1350 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1351 #endif 1352 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1353 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1354 #else /* on missing GESVD */ 1355 /* SVD */ 1356 PetscInt max_n,min_n; 1357 max_n = max_size_of_constraint; 1358 min_n = max_constraints; 1359 if (max_size_of_constraint < max_constraints) { 1360 min_n = max_size_of_constraint; 1361 max_n = max_constraints; 1362 } 1363 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1364 #if defined(PETSC_USE_COMPLEX) 1365 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1366 #endif 1367 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1368 lwork = -1; 1369 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1370 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1371 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1372 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1373 #if !defined(PETSC_USE_COMPLEX) 1374 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1375 #else 1376 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1377 #endif 1378 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1379 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1380 #endif /* on missing GESVD */ 1381 /* Allocate optimal workspace */ 1382 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1383 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1384 } 1385 /* Now we can loop on constraining sets */ 1386 total_counts = 0; 1387 temp_indices[0] = 0; 1388 /* vertices */ 1389 if (ISForVertices) { 1390 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1391 if (nnsp_has_cnst) { /* consider all vertices */ 1392 for (i=0;i<n_vertices;i++) { 1393 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1394 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1395 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1396 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1397 total_counts++; 1398 } 1399 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1400 PetscBool used_vertex; 1401 for (i=0;i<n_vertices;i++) { 1402 used_vertex = PETSC_FALSE; 1403 k = 0; 1404 while (!used_vertex && k<nnsp_size) { 1405 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1406 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1407 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1408 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1409 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1410 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1411 total_counts++; 1412 used_vertex = PETSC_TRUE; 1413 } 1414 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1415 k++; 1416 } 1417 } 1418 } 1419 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1420 n_vertices = total_counts; 1421 } 1422 1423 /* edges and faces */ 1424 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1425 if (i<n_ISForEdges) { 1426 used_IS = &ISForEdges[i]; 1427 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1428 } else { 1429 used_IS = &ISForFaces[i-n_ISForEdges]; 1430 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1431 } 1432 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1433 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1434 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1435 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1436 /* change of basis should not be performed on local periodic nodes */ 1437 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1438 if (nnsp_has_cnst) { 1439 PetscScalar quad_value; 1440 temp_constraints++; 1441 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1442 for (j=0;j<size_of_constraint;j++) { 1443 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1444 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1445 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1446 } 1447 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1448 total_counts++; 1449 } 1450 for (k=0;k<nnsp_size;k++) { 1451 PetscReal real_value; 1452 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1453 for (j=0;j<size_of_constraint;j++) { 1454 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1455 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1456 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1457 } 1458 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1459 /* check if array is null on the connected component */ 1460 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1461 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1462 if (real_value > 0.0) { /* keep indices and values */ 1463 temp_constraints++; 1464 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1465 total_counts++; 1466 } 1467 } 1468 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1469 valid_constraints = temp_constraints; 1470 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1471 if (!pcbddc->use_nnsp_true && temp_constraints) { 1472 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1473 1474 #if defined(PETSC_MISSING_LAPACK_GESVD) 1475 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1476 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1477 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1478 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1479 from that computed using LAPACKgesvd 1480 -> This is due to a different computation of eigenvectors in LAPACKheev 1481 -> The quality of the POD-computed basis will be the same */ 1482 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1483 /* Store upper triangular part of correlation matrix */ 1484 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1485 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1486 for (j=0;j<temp_constraints;j++) { 1487 for (k=0;k<j+1;k++) { 1488 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1489 } 1490 } 1491 /* compute eigenvalues and eigenvectors of correlation matrix */ 1492 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1493 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1494 #if !defined(PETSC_USE_COMPLEX) 1495 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1496 #else 1497 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1498 #endif 1499 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1500 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1501 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1502 j=0; 1503 while (j < temp_constraints && singular_vals[j] < tol) j++; 1504 total_counts=total_counts-j; 1505 valid_constraints = temp_constraints-j; 1506 /* scale and copy POD basis into used quadrature memory */ 1507 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1508 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1509 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1510 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1511 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1512 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1513 if (j<temp_constraints) { 1514 PetscInt ii; 1515 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1516 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1517 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1518 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1519 for (k=0;k<temp_constraints-j;k++) { 1520 for (ii=0;ii<size_of_constraint;ii++) { 1521 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1522 } 1523 } 1524 } 1525 #else /* on missing GESVD */ 1526 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1527 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1528 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1529 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1530 #if !defined(PETSC_USE_COMPLEX) 1531 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1532 #else 1533 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1534 #endif 1535 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1536 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1537 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1538 k = temp_constraints; 1539 if (k > size_of_constraint) k = size_of_constraint; 1540 j = 0; 1541 while (j < k && singular_vals[k-j-1] < tol) j++; 1542 total_counts = total_counts-temp_constraints+k-j; 1543 valid_constraints = k-j; 1544 #endif /* on missing GESVD */ 1545 } 1546 /* setting change_of_basis flag is safe now */ 1547 if (boolforchange) { 1548 for (j=0;j<valid_constraints;j++) { 1549 PetscBTSet(change_basis,total_counts-j-1); 1550 } 1551 } 1552 } 1553 /* free index sets of faces, edges and vertices */ 1554 for (i=0;i<n_ISForFaces;i++) { 1555 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1556 } 1557 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1558 for (i=0;i<n_ISForEdges;i++) { 1559 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1560 } 1561 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1562 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1563 1564 /* free workspace */ 1565 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1566 ierr = PetscFree(work);CHKERRQ(ierr); 1567 #if defined(PETSC_USE_COMPLEX) 1568 ierr = PetscFree(rwork);CHKERRQ(ierr); 1569 #endif 1570 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1571 #if defined(PETSC_MISSING_LAPACK_GESVD) 1572 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1573 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1574 #endif 1575 } 1576 for (k=0;k<nnsp_size;k++) { 1577 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1578 } 1579 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1580 1581 /* set quantities in pcbddc data structure */ 1582 /* n_vertices defines the number of subdomain corners in the primal space */ 1583 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1584 pcbddc->local_primal_size = total_counts; 1585 pcbddc->n_vertices = n_vertices; 1586 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1587 1588 /* Create constraint matrix */ 1589 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1590 /* so we need to set it up properly either with or without change of basis */ 1591 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1592 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1593 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1594 /* array to compute a local numbering of constraints : vertices first then constraints */ 1595 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1596 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1597 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1598 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1599 /* auxiliary stuff for basis change */ 1600 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1601 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1602 1603 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1604 total_primal_vertices=0; 1605 for (i=0;i<pcbddc->local_primal_size;i++) { 1606 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1607 if (size_of_constraint == 1) { 1608 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1609 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1610 aux_primal_minloc[total_primal_vertices]=0; 1611 total_primal_vertices++; 1612 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1613 PetscInt min_loc,min_index; 1614 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1615 /* find first untouched local node */ 1616 k = 0; 1617 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1618 min_index = global_indices[k]; 1619 min_loc = k; 1620 /* search the minimum among global nodes already untouched on the cc */ 1621 for (k=1;k<size_of_constraint;k++) { 1622 /* there can be more than one constraint on a single connected component */ 1623 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1624 min_index = global_indices[k]; 1625 min_loc = k; 1626 } 1627 } 1628 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1629 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1630 aux_primal_minloc[total_primal_vertices]=min_loc; 1631 total_primal_vertices++; 1632 } 1633 } 1634 /* determine if a QR strategy is needed for change of basis */ 1635 qr_needed = PETSC_FALSE; 1636 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1637 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1638 if (PetscBTLookup(change_basis,i)) { 1639 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1640 j = 0; 1641 for (k=0;k<size_of_constraint;k++) { 1642 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1643 j++; 1644 } 1645 } 1646 /* found more than one primal dof on the cc */ 1647 if (j > 1) { 1648 PetscBTSet(qr_needed_idx,i); 1649 qr_needed = PETSC_TRUE; 1650 } 1651 } 1652 } 1653 /* free workspace */ 1654 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1655 1656 /* permute indices in order to have a sorted set of vertices */ 1657 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1658 1659 /* nonzero structure of constraint matrix */ 1660 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1661 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1662 j=total_primal_vertices; 1663 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1664 if (!PetscBTLookup(change_basis,i)) { 1665 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1666 j++; 1667 } 1668 } 1669 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1670 ierr = PetscFree(nnz);CHKERRQ(ierr); 1671 /* set values in constraint matrix */ 1672 for (i=0;i<total_primal_vertices;i++) { 1673 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1674 } 1675 total_counts = total_primal_vertices; 1676 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1677 if (!PetscBTLookup(change_basis,i)) { 1678 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1679 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1680 total_counts++; 1681 } 1682 } 1683 /* assembling */ 1684 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1685 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1686 /* 1687 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1688 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1689 */ 1690 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1691 if (pcbddc->use_change_of_basis) { 1692 /* dual and primal dofs on a single cc */ 1693 PetscInt dual_dofs,primal_dofs; 1694 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1695 PetscInt primal_counter; 1696 /* working stuff for GEQRF */ 1697 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1698 PetscBLASInt lqr_work; 1699 /* working stuff for UNGQR */ 1700 PetscScalar *gqr_work,lgqr_work_t; 1701 PetscBLASInt lgqr_work; 1702 /* working stuff for TRTRS */ 1703 PetscScalar *trs_rhs; 1704 PetscBLASInt Blas_NRHS; 1705 /* pointers for values insertion into change of basis matrix */ 1706 PetscInt *start_rows,*start_cols; 1707 PetscScalar *start_vals; 1708 /* working stuff for values insertion */ 1709 PetscBT is_primal; 1710 1711 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1712 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1713 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1714 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1715 /* work arrays */ 1716 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1717 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1718 /* nonzeros per row */ 1719 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1720 if (PetscBTLookup(change_basis,i)) { 1721 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1722 if (PetscBTLookup(qr_needed_idx,i)) { 1723 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1724 } else { 1725 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1726 /* get local primal index on the cc */ 1727 j = 0; 1728 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1729 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1730 } 1731 } 1732 } 1733 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1734 ierr = PetscFree(nnz);CHKERRQ(ierr); 1735 /* Set initial identity in the matrix */ 1736 for (i=0;i<pcis->n_B;i++) { 1737 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1738 } 1739 1740 if (pcbddc->dbg_flag) { 1741 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1742 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1743 } 1744 1745 1746 /* Now we loop on the constraints which need a change of basis */ 1747 /* 1748 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1749 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1750 1751 Basic blocks of change of basis matrix T computed 1752 1753 - Using the following block transformation if there is only a primal dof on the cc 1754 (in the example, primal dof is the last one of the edge in LOCAL ordering 1755 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1756 | 1 0 ... 0 1 | 1757 | 0 1 ... 0 1 | 1758 | ... | 1759 | 0 ... 1 1 | 1760 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1761 1762 - via QR decomposition of constraints otherwise 1763 */ 1764 if (qr_needed) { 1765 /* space to store Q */ 1766 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1767 /* first we issue queries for optimal work */ 1768 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1769 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1770 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1771 lqr_work = -1; 1772 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1773 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1774 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1775 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1776 lgqr_work = -1; 1777 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1778 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1779 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1780 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1781 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1782 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1783 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1784 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1785 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1786 /* array to store scaling factors for reflectors */ 1787 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1788 /* array to store rhs and solution of triangular solver */ 1789 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1790 /* allocating workspace for check */ 1791 if (pcbddc->dbg_flag) { 1792 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1793 } 1794 } 1795 /* array to store whether a node is primal or not */ 1796 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1797 for (i=0;i<total_primal_vertices;i++) { 1798 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1799 } 1800 1801 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1802 /* -> using implicit ordering contained in temp_indices data */ 1803 total_counts = pcbddc->n_vertices; 1804 primal_counter = total_counts; 1805 while (total_counts<pcbddc->local_primal_size) { 1806 primal_dofs = 1; 1807 if (PetscBTLookup(change_basis,total_counts)) { 1808 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1809 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1810 primal_dofs++; 1811 } 1812 /* get constraint info */ 1813 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1814 dual_dofs = size_of_constraint-primal_dofs; 1815 1816 if (pcbddc->dbg_flag) { 1817 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1818 } 1819 1820 if (primal_dofs > 1) { /* QR */ 1821 1822 /* copy quadrature constraints for change of basis check */ 1823 if (pcbddc->dbg_flag) { 1824 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1825 } 1826 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1827 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1828 1829 /* compute QR decomposition of constraints */ 1830 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1831 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1832 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1833 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1834 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1835 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1836 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1837 1838 /* explictly compute R^-T */ 1839 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1840 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1841 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1842 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1843 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1844 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1845 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1846 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1847 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1848 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1849 1850 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1851 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1852 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1853 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1854 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1855 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1856 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1857 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1858 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1859 1860 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1861 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1862 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1863 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1864 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1865 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1866 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1867 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1868 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1869 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1870 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1871 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1872 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1873 1874 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1875 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1876 /* insert cols for primal dofs */ 1877 for (j=0;j<primal_dofs;j++) { 1878 start_vals = &qr_basis[j*size_of_constraint]; 1879 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1880 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1881 } 1882 /* insert cols for dual dofs */ 1883 for (j=0,k=0;j<dual_dofs;k++) { 1884 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1885 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1886 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1887 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1888 j++; 1889 } 1890 } 1891 1892 /* check change of basis */ 1893 if (pcbddc->dbg_flag) { 1894 PetscInt ii,jj; 1895 PetscBool valid_qr=PETSC_TRUE; 1896 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1897 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1898 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1899 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1900 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1901 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1902 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1903 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1904 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1905 for (jj=0;jj<size_of_constraint;jj++) { 1906 for (ii=0;ii<primal_dofs;ii++) { 1907 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1908 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1909 } 1910 } 1911 if (!valid_qr) { 1912 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1913 for (jj=0;jj<size_of_constraint;jj++) { 1914 for (ii=0;ii<primal_dofs;ii++) { 1915 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1916 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1917 } 1918 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1919 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1920 } 1921 } 1922 } 1923 } else { 1924 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1925 } 1926 } 1927 } else { /* simple transformation block */ 1928 PetscInt row,col; 1929 PetscScalar val; 1930 for (j=0;j<size_of_constraint;j++) { 1931 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 1932 if (!PetscBTLookup(is_primal,row)) { 1933 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1934 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 1935 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 1936 } else { 1937 for (k=0;k<size_of_constraint;k++) { 1938 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1939 if (row != col) { 1940 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1941 } else { 1942 val = 1.0; 1943 } 1944 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 1945 } 1946 } 1947 } 1948 } 1949 /* increment primal counter */ 1950 primal_counter += primal_dofs; 1951 } else { 1952 if (pcbddc->dbg_flag) { 1953 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1954 } 1955 } 1956 /* increment constraint counter total_counts */ 1957 total_counts += primal_dofs; 1958 } 1959 1960 /* free workspace */ 1961 if (qr_needed) { 1962 if (pcbddc->dbg_flag) { 1963 ierr = PetscFree(work);CHKERRQ(ierr); 1964 } 1965 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1966 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1967 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1968 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1969 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1970 } 1971 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1972 /* assembling */ 1973 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1974 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1975 /* 1976 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1977 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1978 */ 1979 } 1980 1981 /* flush dbg viewer */ 1982 if (pcbddc->dbg_flag) { 1983 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1984 } 1985 1986 /* free workspace */ 1987 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1988 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 1989 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1990 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1991 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1992 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1993 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1994 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1995 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1996 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1997 PetscFunctionReturn(0); 1998 } 1999 2000 #undef __FUNCT__ 2001 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2002 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2003 { 2004 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2005 PC_IS *pcis = (PC_IS*)pc->data; 2006 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2007 PetscInt bs,ierr,i,vertex_size; 2008 PetscViewer viewer=pcbddc->dbg_viewer; 2009 2010 PetscFunctionBegin; 2011 /* Reset previously computed graph */ 2012 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2013 /* Init local Graph struct */ 2014 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2015 2016 /* Check validity of the csr graph passed in by the user */ 2017 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2018 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2019 } 2020 2021 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2022 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2023 Mat mat_adj; 2024 const PetscInt *xadj,*adjncy; 2025 PetscBool flg_row=PETSC_TRUE; 2026 2027 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2028 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2029 if (!flg_row) { 2030 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2031 } 2032 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2033 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2034 if (!flg_row) { 2035 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2036 } 2037 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2038 } 2039 2040 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2041 vertex_size = 1; 2042 if (!pcbddc->user_provided_isfordofs) { 2043 if (!pcbddc->n_ISForDofs) { 2044 IS *custom_ISForDofs; 2045 2046 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2047 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2048 for (i=0;i<bs;i++) { 2049 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2050 } 2051 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2052 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2053 /* remove my references to IS objects */ 2054 for (i=0;i<bs;i++) { 2055 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2056 } 2057 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2058 } 2059 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2060 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2061 } 2062 2063 /* Setup of Graph */ 2064 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2065 2066 /* Graph's connected components analysis */ 2067 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2068 2069 /* print some info to stdout */ 2070 if (pcbddc->dbg_flag) { 2071 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2072 } 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2078 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2079 { 2080 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2081 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2082 PetscErrorCode ierr; 2083 2084 PetscFunctionBegin; 2085 n = 0; 2086 vertices = 0; 2087 if (pcbddc->ConstraintMatrix) { 2088 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2089 for (i=0;i<local_primal_size;i++) { 2090 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2091 if (size_of_constraint == 1) n++; 2092 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2093 } 2094 if (vertices_idx) { 2095 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2096 n = 0; 2097 for (i=0;i<local_primal_size;i++) { 2098 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2099 if (size_of_constraint == 1) { 2100 vertices[n++]=row_cmat_indices[0]; 2101 } 2102 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2103 } 2104 } 2105 } 2106 *n_vertices = n; 2107 if (vertices_idx) *vertices_idx = vertices; 2108 PetscFunctionReturn(0); 2109 } 2110 2111 #undef __FUNCT__ 2112 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2113 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2114 { 2115 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2116 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2117 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2118 PetscBT touched; 2119 PetscErrorCode ierr; 2120 2121 /* This function assumes that the number of local constraints per connected component 2122 is not greater than the number of nodes defined for the connected component 2123 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2124 PetscFunctionBegin; 2125 n = 0; 2126 constraints_index = 0; 2127 if (pcbddc->ConstraintMatrix) { 2128 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2129 max_size_of_constraint = 0; 2130 for (i=0;i<local_primal_size;i++) { 2131 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2132 if (size_of_constraint > 1) { 2133 n++; 2134 } 2135 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2136 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2137 } 2138 if (constraints_idx) { 2139 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2140 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2141 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2142 n = 0; 2143 for (i=0;i<local_primal_size;i++) { 2144 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2145 if (size_of_constraint > 1) { 2146 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2147 /* find first untouched local node */ 2148 j = 0; 2149 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2150 min_index = row_cmat_global_indices[j]; 2151 min_loc = j; 2152 /* search the minimum among nodes not yet touched on the connected component 2153 since there can be more than one constraint on a single cc */ 2154 for (j=1;j<size_of_constraint;j++) { 2155 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2156 min_index = row_cmat_global_indices[j]; 2157 min_loc = j; 2158 } 2159 } 2160 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2161 constraints_index[n++] = row_cmat_indices[min_loc]; 2162 } 2163 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2164 } 2165 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2166 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2167 } 2168 } 2169 *n_constraints = n; 2170 if (constraints_idx) *constraints_idx = constraints_index; 2171 PetscFunctionReturn(0); 2172 } 2173 2174 /* the next two functions has been adapted from pcis.c */ 2175 #undef __FUNCT__ 2176 #define __FUNCT__ "PCBDDCApplySchur" 2177 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2178 { 2179 PetscErrorCode ierr; 2180 PC_IS *pcis = (PC_IS*)(pc->data); 2181 2182 PetscFunctionBegin; 2183 if (!vec2_B) { vec2_B = v; } 2184 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2185 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2186 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2187 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2188 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2189 PetscFunctionReturn(0); 2190 } 2191 2192 #undef __FUNCT__ 2193 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2194 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2195 { 2196 PetscErrorCode ierr; 2197 PC_IS *pcis = (PC_IS*)(pc->data); 2198 2199 PetscFunctionBegin; 2200 if (!vec2_B) { vec2_B = v; } 2201 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2202 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2203 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2204 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2205 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2206 PetscFunctionReturn(0); 2207 } 2208 2209 #undef __FUNCT__ 2210 #define __FUNCT__ "PCBDDCSubsetNumbering" 2211 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2212 { 2213 Vec local_vec,global_vec; 2214 IS seqis,paris; 2215 VecScatter scatter_ctx; 2216 PetscScalar *array; 2217 PetscInt *temp_global_dofs; 2218 PetscScalar globalsum; 2219 PetscInt i,j,s; 2220 PetscInt nlocals,first_index,old_index,max_local; 2221 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2222 PetscMPIInt *dof_sizes,*dof_displs; 2223 PetscBool first_found; 2224 PetscErrorCode ierr; 2225 2226 PetscFunctionBegin; 2227 /* mpi buffers */ 2228 MPI_Comm_size(comm,&size_prec_comm); 2229 MPI_Comm_rank(comm,&rank_prec_comm); 2230 j = ( !rank_prec_comm ? size_prec_comm : 0); 2231 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2232 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2233 /* get maximum size of subset */ 2234 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2235 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2236 max_local = 0; 2237 if (n_local_dofs) { 2238 max_local = temp_global_dofs[0]; 2239 for (i=1;i<n_local_dofs;i++) { 2240 if (max_local < temp_global_dofs[i] ) { 2241 max_local = temp_global_dofs[i]; 2242 } 2243 } 2244 } 2245 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2246 max_global++; 2247 max_local = 0; 2248 if (n_local_dofs) { 2249 max_local = local_dofs[0]; 2250 for (i=1;i<n_local_dofs;i++) { 2251 if (max_local < local_dofs[i] ) { 2252 max_local = local_dofs[i]; 2253 } 2254 } 2255 } 2256 max_local++; 2257 /* allocate workspace */ 2258 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2259 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2260 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2261 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2262 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2263 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2264 /* create scatter */ 2265 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2266 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2267 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2268 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2269 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2270 /* init array */ 2271 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2272 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2273 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2274 if (local_dofs_mult) { 2275 for (i=0;i<n_local_dofs;i++) { 2276 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2277 } 2278 } else { 2279 for (i=0;i<n_local_dofs;i++) { 2280 array[local_dofs[i]]=1.0; 2281 } 2282 } 2283 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2284 /* scatter into global vec and get total number of global dofs */ 2285 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2286 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2287 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2288 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2289 /* Fill global_vec with cumulative function for global numbering */ 2290 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2291 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2292 nlocals = 0; 2293 first_index = -1; 2294 first_found = PETSC_FALSE; 2295 for (i=0;i<s;i++) { 2296 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2297 first_found = PETSC_TRUE; 2298 first_index = i; 2299 } 2300 nlocals += (PetscInt)PetscRealPart(array[i]); 2301 } 2302 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2303 if (!rank_prec_comm) { 2304 dof_displs[0]=0; 2305 for (i=1;i<size_prec_comm;i++) { 2306 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2307 } 2308 } 2309 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2310 if (first_found) { 2311 array[first_index] += (PetscScalar)nlocals; 2312 old_index = first_index; 2313 for (i=first_index+1;i<s;i++) { 2314 if (PetscRealPart(array[i]) > 0.0) { 2315 array[i] += array[old_index]; 2316 old_index = i; 2317 } 2318 } 2319 } 2320 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2321 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2322 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2323 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2324 /* get global ordering of local dofs */ 2325 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2326 if (local_dofs_mult) { 2327 for (i=0;i<n_local_dofs;i++) { 2328 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2329 } 2330 } else { 2331 for (i=0;i<n_local_dofs;i++) { 2332 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2333 } 2334 } 2335 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2336 /* free workspace */ 2337 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2338 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2339 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2340 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2341 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2342 /* return pointer to global ordering of local dofs */ 2343 *global_numbering_subset = temp_global_dofs; 2344 PetscFunctionReturn(0); 2345 } 2346 2347 #undef __FUNCT__ 2348 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2349 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2350 { 2351 PetscInt i,j; 2352 PetscScalar *alphas; 2353 PetscErrorCode ierr; 2354 2355 PetscFunctionBegin; 2356 /* this implements stabilized Gram-Schmidt */ 2357 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2358 for (i=0;i<n;i++) { 2359 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2360 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2361 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2362 } 2363 ierr = PetscFree(alphas);CHKERRQ(ierr); 2364 PetscFunctionReturn(0); 2365 } 2366 2367 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2368 #undef __FUNCT__ 2369 #define __FUNCT__ "MatConvert_IS_AIJ" 2370 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2371 { 2372 Mat new_mat; 2373 Mat_IS *matis = (Mat_IS*)(mat->data); 2374 /* info on mat */ 2375 PetscInt bs,rows,cols; 2376 PetscInt lrows,lcols; 2377 PetscInt local_rows,local_cols; 2378 PetscMPIInt nsubdomains; 2379 /* preallocation */ 2380 Vec vec_dnz,vec_onz; 2381 PetscScalar *my_dnz,*my_onz,*array; 2382 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2383 PetscInt index_row,index_col,owner; 2384 PetscInt *local_indices,*global_indices; 2385 /* work */ 2386 PetscInt i,j; 2387 PetscErrorCode ierr; 2388 2389 PetscFunctionBegin; 2390 /* CHECKS */ 2391 /* get info from mat */ 2392 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2393 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2394 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2395 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2396 2397 /* MAT_INITIAL_MATRIX */ 2398 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2399 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2400 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2401 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2402 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2403 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2404 2405 /* 2406 preallocation 2407 */ 2408 2409 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2410 /* 2411 Some vectors are needed to sum up properly on shared interface dofs. 2412 Note that preallocation is not exact, since it overestimates nonzeros 2413 */ 2414 /* 2415 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2416 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2417 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2418 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2419 */ 2420 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2421 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2422 /* All processes need to compute entire row ownership */ 2423 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2424 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2425 for (i=0;i<nsubdomains;i++) { 2426 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2427 row_ownership[j]=i; 2428 } 2429 } 2430 /* map indices of local mat to global values */ 2431 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2432 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2433 for (i=0;i<local_rows;i++) local_indices[i]=i; 2434 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2435 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2436 2437 /* 2438 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2439 then, they will be summed up properly. This way, preallocation is always sufficient 2440 */ 2441 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2442 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2443 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2444 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2445 for (i=0;i<local_rows;i++) { 2446 index_row = global_indices[i]; 2447 for (j=i;j<local_rows;j++) { 2448 owner = row_ownership[index_row]; 2449 index_col = global_indices[j]; 2450 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2451 my_dnz[i] += 1.0; 2452 } else { /* offdiag block */ 2453 my_onz[i] += 1.0; 2454 } 2455 /* same as before, interchanging rows and cols */ 2456 if (i != j) { 2457 owner = row_ownership[index_col]; 2458 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2459 my_dnz[j] += 1.0; 2460 } else { 2461 my_onz[j] += 1.0; 2462 } 2463 } 2464 } 2465 } 2466 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2467 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2468 if (local_rows) { /* multilevel guard */ 2469 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2470 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2471 } 2472 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2473 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2474 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2475 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2476 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2477 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2478 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2479 2480 /* set computed preallocation in dnz and onz */ 2481 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2482 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2483 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2484 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2485 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2486 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2487 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2488 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2489 2490 /* Resize preallocation if overestimated */ 2491 for (i=0;i<lrows;i++) { 2492 dnz[i] = PetscMin(dnz[i],lcols); 2493 onz[i] = PetscMin(onz[i],cols-lcols); 2494 } 2495 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2496 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2497 2498 /* 2499 Set values. Very Basic. 2500 */ 2501 for (i=0;i<local_rows;i++) { 2502 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2503 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2504 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2505 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2506 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2507 } 2508 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2509 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2510 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2511 2512 /* get back mat */ 2513 *M = new_mat; 2514 PetscFunctionReturn(0); 2515 } 2516 2517 #undef __FUNCT__ 2518 #define __FUNCT__ "MatISSubassemble_Private" 2519 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2520 { 2521 Mat subdomain_adj; 2522 IS new_ranks,ranks_send_to; 2523 MatPartitioning partitioner; 2524 Mat_IS *matis; 2525 PetscInt n_neighs,*neighs,*n_shared,**shared; 2526 PetscInt prank; 2527 PetscMPIInt size,rank,color; 2528 PetscInt *xadj,*adjncy,*oldranks; 2529 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2530 PetscInt i,j,n_subdomains,local_size,threshold=0; 2531 PetscErrorCode ierr; 2532 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2533 PetscSubcomm subcomm; 2534 2535 PetscFunctionBegin; 2536 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2537 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2538 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2539 2540 /* Get info on mapping */ 2541 matis = (Mat_IS*)(mat->data); 2542 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2543 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2544 2545 /* build local CSR graph of subdomains' connectivity */ 2546 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2547 xadj[0] = 0; 2548 xadj[1] = PetscMax(n_neighs-1,0); 2549 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2550 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2551 2552 if (threshold) { 2553 PetscInt* count,min_threshold; 2554 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2555 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2556 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2557 for (j=0;j<n_shared[i];j++) { 2558 count[shared[i][j]] += 1; 2559 } 2560 } 2561 /* adapt threshold since we dont want to lose significant connections */ 2562 min_threshold = n_neighs; 2563 for (i=1;i<n_neighs;i++) { 2564 for (j=0;j<n_shared[i];j++) { 2565 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2566 } 2567 } 2568 PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold); 2569 threshold = PetscMax(min_threshold+1,threshold); 2570 PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold); 2571 xadj[1] = 0; 2572 for (i=1;i<n_neighs;i++) { 2573 for (j=0;j<n_shared[i];j++) { 2574 if (count[shared[i][j]] < threshold) { 2575 adjncy[xadj[1]] = neighs[i]; 2576 adjncy_wgt[xadj[1]] = n_shared[i]; 2577 xadj[1]++; 2578 break; 2579 } 2580 } 2581 } 2582 ierr = PetscFree(count);CHKERRQ(ierr); 2583 } else { 2584 if (xadj[1]) { 2585 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2586 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2587 } 2588 } 2589 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2590 if (use_square) { 2591 for (i=0;i<xadj[1];i++) { 2592 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2593 } 2594 } 2595 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2596 2597 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2598 2599 /* 2600 Restrict work on active processes only. 2601 */ 2602 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2603 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2604 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2605 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2606 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2607 if (color) { 2608 ierr = PetscFree(xadj);CHKERRQ(ierr); 2609 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2610 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2611 } else { 2612 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2613 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2614 prank = rank; 2615 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2616 for (i=0;i<size;i++) { 2617 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2618 } 2619 for (i=0;i<xadj[1];i++) { 2620 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2621 } 2622 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2623 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2624 n_subdomains = (PetscInt)size; 2625 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2626 2627 /* Partition */ 2628 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2629 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2630 if (use_vwgt) { 2631 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2632 v_wgt[0] = local_size; 2633 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2634 } 2635 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2636 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2637 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2638 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2639 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2640 2641 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2642 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2643 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2644 /* clean up */ 2645 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2646 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2647 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2648 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2649 } 2650 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2651 2652 /* assemble parallel IS for sends */ 2653 i = 1; 2654 if (color) i=0; 2655 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2656 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2657 2658 /* get back IS */ 2659 *is_sends = ranks_send_to; 2660 PetscFunctionReturn(0); 2661 } 2662 2663 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2664 2665 #undef __FUNCT__ 2666 #define __FUNCT__ "MatISSubassemble" 2667 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2668 { 2669 Mat local_mat,new_mat; 2670 Mat_IS *matis; 2671 IS is_sends_internal; 2672 PetscInt rows,cols; 2673 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2674 PetscBool ismatis,isdense; 2675 ISLocalToGlobalMapping l2gmap; 2676 PetscInt* l2gmap_indices; 2677 const PetscInt* is_indices; 2678 MatType new_local_type; 2679 MatTypePrivate new_local_type_private; 2680 /* buffers */ 2681 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2682 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2683 /* MPI */ 2684 MPI_Comm comm; 2685 PetscMPIInt n_sends,n_recvs,commsize; 2686 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2687 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2688 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2689 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2690 PetscErrorCode ierr; 2691 2692 PetscFunctionBegin; 2693 /* checks */ 2694 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2695 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2696 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2697 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2698 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2699 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2700 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2701 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2702 PetscValidLogicalCollectiveInt(mat,bs,0); 2703 /* prepare IS for sending if not provided */ 2704 if (!is_sends) { 2705 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2706 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2707 } else { 2708 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2709 is_sends_internal = is_sends; 2710 } 2711 2712 /* get pointer of MATIS data */ 2713 matis = (Mat_IS*)mat->data; 2714 2715 /* get comm */ 2716 comm = PetscObjectComm((PetscObject)mat); 2717 2718 /* compute number of sends */ 2719 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2720 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2721 2722 /* compute number of receives */ 2723 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2724 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2725 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2726 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2727 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2728 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2729 ierr = PetscFree(iflags);CHKERRQ(ierr); 2730 2731 /* prepare send/receive buffers */ 2732 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2733 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2734 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2735 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2736 2737 /* Get data from local mat */ 2738 if (!isdense) { 2739 /* TODO: See below some guidelines on how to prepare the local buffers */ 2740 /* 2741 send_buffer_vals should contain the raw values of the local matrix 2742 send_buffer_idxs should contain: 2743 - MatType_PRIVATE type 2744 - PetscInt size_of_l2gmap 2745 - PetscInt global_row_indices[size_of_l2gmap] 2746 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2747 */ 2748 } else { 2749 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2750 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2751 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2752 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2753 send_buffer_idxs[1] = i; 2754 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2755 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2756 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2757 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2758 for (i=0;i<n_sends;i++) { 2759 ilengths_vals[is_indices[i]] = len*len; 2760 ilengths_idxs[is_indices[i]] = len+2; 2761 } 2762 } 2763 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2764 buf_size_idxs = 0; 2765 buf_size_vals = 0; 2766 for (i=0;i<n_recvs;i++) { 2767 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2768 buf_size_vals += (PetscInt)olengths_vals[i]; 2769 } 2770 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2771 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2772 2773 /* get new tags for clean communications */ 2774 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2775 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2776 2777 /* allocate for requests */ 2778 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2779 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2780 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2781 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2782 2783 /* communications */ 2784 ptr_idxs = recv_buffer_idxs; 2785 ptr_vals = recv_buffer_vals; 2786 for (i=0;i<n_recvs;i++) { 2787 source_dest = onodes[i]; 2788 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2789 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2790 ptr_idxs += olengths_idxs[i]; 2791 ptr_vals += olengths_vals[i]; 2792 } 2793 for (i=0;i<n_sends;i++) { 2794 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2795 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2796 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2797 } 2798 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2799 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2800 2801 /* assemble new l2g map */ 2802 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2803 ptr_idxs = recv_buffer_idxs; 2804 buf_size_idxs = 0; 2805 for (i=0;i<n_recvs;i++) { 2806 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2807 ptr_idxs += olengths_idxs[i]; 2808 } 2809 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2810 ptr_idxs = recv_buffer_idxs; 2811 buf_size_idxs = 0; 2812 for (i=0;i<n_recvs;i++) { 2813 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2814 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2815 ptr_idxs += olengths_idxs[i]; 2816 } 2817 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2818 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2819 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2820 2821 /* infer new local matrix type from received local matrices type */ 2822 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2823 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2824 new_local_type_private = MATAIJ_PRIVATE; 2825 new_local_type = MATSEQAIJ; 2826 if (n_recvs) { 2827 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 2828 ptr_idxs = recv_buffer_idxs; 2829 for (i=0;i<n_recvs;i++) { 2830 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2831 new_local_type_private = MATAIJ_PRIVATE; 2832 break; 2833 } 2834 ptr_idxs += olengths_idxs[i]; 2835 } 2836 switch (new_local_type_private) { 2837 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2838 new_local_type = MATSEQAIJ; 2839 bs = 1; 2840 break; 2841 case MATAIJ_PRIVATE: 2842 new_local_type = MATSEQAIJ; 2843 bs = 1; 2844 break; 2845 case MATBAIJ_PRIVATE: 2846 new_local_type = MATSEQBAIJ; 2847 break; 2848 case MATSBAIJ_PRIVATE: 2849 new_local_type = MATSEQSBAIJ; 2850 break; 2851 default: 2852 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2853 break; 2854 } 2855 } 2856 2857 /* create MATIS object */ 2858 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2859 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2860 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2861 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2862 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2863 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2864 2865 /* set values */ 2866 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2867 ptr_vals = recv_buffer_vals; 2868 ptr_idxs = recv_buffer_idxs; 2869 for (i=0;i<n_recvs;i++) { 2870 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2871 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2872 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2873 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2874 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2875 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2876 } 2877 ptr_idxs += olengths_idxs[i]; 2878 ptr_vals += olengths_vals[i]; 2879 } 2880 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2881 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2882 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2883 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2884 2885 { /* check */ 2886 Vec lvec,rvec; 2887 PetscReal infty_error; 2888 2889 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2890 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2891 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2892 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2893 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2894 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2895 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2896 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2897 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2898 } 2899 2900 /* free workspace */ 2901 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2902 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2903 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2904 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2905 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2906 if (isdense) { 2907 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2908 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2909 } else { 2910 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2911 } 2912 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2913 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2914 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2915 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2916 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2917 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2918 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2919 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2920 ierr = PetscFree(onodes);CHKERRQ(ierr); 2921 /* get back new mat */ 2922 *mat_n = new_mat; 2923 PetscFunctionReturn(0); 2924 } 2925 2926 #undef __FUNCT__ 2927 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2928 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2929 { 2930 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2931 PC_IS *pcis = (PC_IS*)pc->data; 2932 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2933 MatNullSpace CoarseNullSpace=NULL; 2934 ISLocalToGlobalMapping coarse_islg; 2935 IS coarse_is; 2936 PetscInt max_it,coarse_size,*local_primal_indices=NULL; 2937 PetscInt im_active=-1,active_procs=-1; 2938 PC pc_temp; 2939 PCType coarse_pc_type; 2940 KSPType coarse_ksp_type; 2941 PetscBool multilevel_requested,multilevel_allowed; 2942 PetscBool setsym,issym,isbddc,isnn; 2943 MatStructure matstruct; 2944 PetscErrorCode ierr; 2945 2946 PetscFunctionBegin; 2947 /* Assign global numbering to coarse dofs */ 2948 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2949 2950 /* infer some info from user */ 2951 issym = PETSC_FALSE; 2952 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2953 multilevel_allowed = PETSC_FALSE; 2954 multilevel_requested = PETSC_FALSE; 2955 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 2956 if (multilevel_requested) { 2957 /* count "active processes" */ 2958 im_active = !!(pcis->n); 2959 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2960 if (active_procs/pcbddc->coarsening_ratio < 2) { 2961 multilevel_allowed = PETSC_FALSE; 2962 } else { 2963 multilevel_allowed = PETSC_TRUE; 2964 } 2965 } 2966 2967 /* set defaults for coarse KSP and PC */ 2968 if (multilevel_allowed) { 2969 if (issym) { 2970 coarse_ksp_type = KSPRICHARDSON; 2971 } else { 2972 coarse_ksp_type = KSPCHEBYSHEV; 2973 } 2974 coarse_pc_type = PCBDDC; 2975 } else { 2976 coarse_ksp_type = KSPPREONLY; 2977 coarse_pc_type = PCREDUNDANT; 2978 } 2979 2980 /* create the coarse KSP object only once with defaults */ 2981 if (!pcbddc->coarse_ksp) { 2982 char prefix[256],str_level[3]; 2983 size_t len; 2984 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 2985 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2986 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 2987 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2988 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2989 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2990 /* prefix */ 2991 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 2992 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2993 if (!pcbddc->current_level) { 2994 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2995 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 2996 } else { 2997 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2998 if (pcbddc->current_level>1) len -= 2; 2999 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3000 *(prefix+len)='\0'; 3001 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3002 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3003 } 3004 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3005 } 3006 /* allow user customization */ 3007 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3008 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3009 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3010 3011 /* get some info after set from options */ 3012 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3013 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3014 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3015 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3016 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3017 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3018 isbddc = PETSC_FALSE; 3019 } 3020 3021 /* propagate BDDC info to the next level */ 3022 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3023 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3024 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3025 3026 /* creates temporary MATIS object for coarse matrix */ 3027 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3028 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3029 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3030 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3031 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3032 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3033 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3034 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3035 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3036 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 3037 3038 /* assemble coarse matrix */ 3039 if (isbddc || isnn) { 3040 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3041 } else { 3042 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3043 } 3044 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3045 3046 /* create local to global scatters for coarse problem */ 3047 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3048 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3049 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3050 3051 /* propagate symmetry info to coarse matrix */ 3052 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3053 3054 /* Compute coarse null space (special handling by BDDC only) */ 3055 if (pcbddc->NullSpace) { 3056 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3057 if (isbddc) { 3058 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3059 } else { 3060 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3061 } 3062 } 3063 3064 /* set operators */ 3065 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3066 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3067 3068 /* additional KSP customization */ 3069 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3070 if (max_it < 5) { 3071 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3072 } 3073 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3074 3075 3076 /* print some info if requested */ 3077 if (pcbddc->dbg_flag) { 3078 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3079 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3080 if (!multilevel_allowed) { 3081 if (multilevel_requested) { 3082 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3083 } else if (pcbddc->max_levels) { 3084 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3085 } 3086 } 3087 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3088 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3089 } 3090 3091 /* setup coarse ksp */ 3092 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3093 if (pcbddc->dbg_flag) { 3094 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3095 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3096 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3097 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3098 } 3099 3100 /* Check coarse problem if requested */ 3101 if (pcbddc->dbg_flag) { 3102 KSP check_ksp; 3103 KSPType check_ksp_type; 3104 PC check_pc; 3105 Vec check_vec; 3106 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3107 PetscInt its; 3108 PetscBool ispreonly,compute; 3109 3110 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3111 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3112 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3113 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3114 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3115 if (ispreonly) { 3116 check_ksp_type = KSPPREONLY; 3117 compute = PETSC_FALSE; 3118 } else { 3119 if (issym) check_ksp_type = KSPCG; 3120 else check_ksp_type = KSPGMRES; 3121 compute = PETSC_TRUE; 3122 } 3123 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3124 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3125 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3126 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3127 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3128 /* create random vec */ 3129 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3130 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3131 if (CoarseNullSpace) { 3132 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3133 } 3134 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3135 /* solve coarse problem */ 3136 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3137 if (CoarseNullSpace) { 3138 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3139 } 3140 /* check coarse problem residual error */ 3141 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3142 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3143 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3144 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3145 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3146 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3147 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3148 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3149 /* get eigenvalue estimation if preonly has not been requested */ 3150 if (!ispreonly) { 3151 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3152 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3153 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3154 } 3155 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3156 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3157 } 3158 /* free memory */ 3159 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3160 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3161 PetscFunctionReturn(0); 3162 } 3163 3164 #undef __FUNCT__ 3165 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3166 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3167 { 3168 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3169 PC_IS* pcis = (PC_IS*)pc->data; 3170 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3171 PetscInt i,j,coarse_size; 3172 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3173 PetscErrorCode ierr; 3174 3175 PetscFunctionBegin; 3176 /* get indices in local ordering for vertices and constraints */ 3177 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3178 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3179 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3180 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3181 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3182 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3183 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3184 3185 /* Compute global number of coarse dofs */ 3186 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3187 3188 /* check numbering */ 3189 if (pcbddc->dbg_flag) { 3190 PetscScalar coarsesum,*array; 3191 3192 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3193 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3194 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3195 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3196 for (i=0;i<pcbddc->local_primal_size;i++) { 3197 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3198 } 3199 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3200 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3201 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3202 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3203 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3204 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3205 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3206 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3207 for (i=0;i<pcis->n;i++) { 3208 if (array[i] == 1.0) { 3209 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3210 } 3211 } 3212 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3213 for (i=0;i<pcis->n;i++) { 3214 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3215 } 3216 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3217 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3218 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3219 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3220 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3221 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3222 if (pcbddc->dbg_flag > 1) { 3223 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3224 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3225 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3226 for (i=0;i<pcbddc->local_primal_size;i++) { 3227 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]); 3228 } 3229 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3230 } 3231 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3232 } 3233 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3234 /* get back data */ 3235 *coarse_size_n = coarse_size; 3236 *local_primal_indices_n = local_primal_indices; 3237 PetscFunctionReturn(0); 3238 } 3239 3240