1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 18 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 23 24 /* Change global null space passed in by the user if change of basis has been requested */ 25 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 26 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 27 } 28 29 /* 30 Setup local correction and local part of coarse basis. 31 Gives back the dense local part of the coarse matrix in column major ordering 32 */ 33 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 34 35 /* Compute total number of coarse nodes and setup coarse solver */ 36 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 37 38 /* free */ 39 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 40 PetscFunctionReturn(0); 41 } 42 43 #undef __FUNCT__ 44 #define __FUNCT__ "PCBDDCResetCustomization" 45 PetscErrorCode PCBDDCResetCustomization(PC pc) 46 { 47 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 48 PetscInt i; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 57 for (i=0;i<pcbddc->n_ISForDofs;i++) { 58 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 59 } 60 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 61 pcbddc->n_ISForDofs = 0; 62 PetscFunctionReturn(0); 63 } 64 65 #undef __FUNCT__ 66 #define __FUNCT__ "PCBDDCResetTopography" 67 PetscErrorCode PCBDDCResetTopography(PC pc) 68 { 69 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 74 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 75 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 79 #undef __FUNCT__ 80 #define __FUNCT__ "PCBDDCResetSolvers" 81 PetscErrorCode PCBDDCResetSolvers(PC pc) 82 { 83 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 84 PetscErrorCode ierr; 85 86 PetscFunctionBegin; 87 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 88 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 89 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 93 /* 94 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 96 */ 97 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 99 /* 100 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 102 */ 103 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 106 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCSetUpWorkVectors" 112 PetscErrorCode PCBDDCSetUpWorkVectors(PC pc) 113 { 114 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 115 PC_IS *pcis = (PC_IS*)pc->data; 116 VecType impVecType; 117 PetscInt n_vertices,n_constraints,local_primal_size,n_R,old_size; 118 PetscErrorCode ierr; 119 120 PetscFunctionBegin; 121 /* get sizes */ 122 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 123 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,NULL);CHKERRQ(ierr); 124 n_constraints = local_primal_size - n_vertices; 125 n_R = pcis->n-n_vertices; 126 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 127 /* local work vectors (try to avoid unneeded work)*/ 128 /* R nodes */ 129 old_size = -1; 130 if (pcbddc->vec1_R) { 131 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 132 } 133 if (n_R != old_size) { 134 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 135 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 136 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 137 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 138 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 139 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 140 } 141 /* local primal dofs */ 142 old_size = -1; 143 if (pcbddc->vec1_P) { 144 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 145 } 146 if (local_primal_size != old_size) { 147 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 148 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 149 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 150 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 151 } 152 /* local explicit constraints */ 153 old_size = -1; 154 if (pcbddc->vec1_C) { 155 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 156 } 157 if (n_constraints && n_constraints != old_size) { 158 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 159 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 160 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 161 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 162 } 163 PetscFunctionReturn(0); 164 } 165 166 #undef __FUNCT__ 167 #define __FUNCT__ "PCBDDCSetUpCorrection" 168 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 169 { 170 PetscErrorCode ierr; 171 /* pointers to pcis and pcbddc */ 172 PC_IS* pcis = (PC_IS*)pc->data; 173 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 174 /* submatrices of local problem */ 175 Mat A_RV,A_VR,A_VV; 176 /* working matrices */ 177 Mat M1,M2,M3,C_CR; 178 /* working vectors */ 179 Vec vec1_C,vec2_C,vec1_V,vec2_V; 180 /* additional working stuff */ 181 IS is_aux; 182 ISLocalToGlobalMapping BtoNmap; 183 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 184 const PetscScalar *array,*row_cmat_values; 185 const PetscInt *row_cmat_indices,*idx_R_local; 186 PetscInt *vertices,*idx_V_B,*auxindices; 187 PetscInt n_vertices,n_constraints,size_of_constraint; 188 PetscInt i,j,n_R,n_D,n_B; 189 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 190 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 191 MatType impMatType; 192 /* some shortcuts to scalars */ 193 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 194 /* for debugging purposes */ 195 PetscReal *coarsefunctions_errors,*constraints_errors; 196 197 PetscFunctionBegin; 198 /* get number of vertices and their local indices */ 199 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 200 n_constraints = pcbddc->local_primal_size-n_vertices; 201 /* Set Non-overlapping dimensions */ 202 n_B = pcis->n_B; n_D = pcis->n - n_B; 203 n_R = pcis->n-n_vertices; 204 205 /* Set types for local objects needed by BDDC precondtioner */ 206 impMatType = MATSEQDENSE; 207 208 /* Allocating some extra storage just to be safe */ 209 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 210 for (i=0;i<pcis->n;i++) auxindices[i]=i; 211 212 /* vertices in boundary numbering */ 213 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 214 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 215 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 216 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 217 if (i != n_vertices) { 218 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 219 } 220 221 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 222 if (n_constraints) { 223 /* work vectors */ 224 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 225 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 226 /* auxiliary matrices */ 227 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 228 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 229 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 230 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 231 232 /* Extract constraints on R nodes: C_{CR} */ 233 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 234 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 235 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 236 237 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 238 for (i=0;i<n_constraints;i++) { 239 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 240 /* Get row of constraint matrix in R numbering */ 241 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 242 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 243 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 244 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 245 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 246 /* Solve for row of constraint matrix in R numbering */ 247 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 248 /* Set values in local_auxmat2 */ 249 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 250 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 251 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 252 } 253 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 254 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 255 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 256 257 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 258 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 259 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 260 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 261 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 262 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 263 ierr = MatSetUp(M1);CHKERRQ(ierr); 264 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 265 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 266 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 267 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 268 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 269 ierr = MatDestroy(&M2);CHKERRQ(ierr); 270 ierr = MatDestroy(&M3);CHKERRQ(ierr); 271 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 272 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 273 } 274 275 /* Get submatrices from subdomain matrix */ 276 if (n_vertices) { 277 PetscInt ibs,mbs; 278 PetscBool issbaij; 279 Mat newmat; 280 281 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 282 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 283 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 284 if (ibs != mbs) { /* need to convert to SEQAIJ */ 285 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 286 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 287 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 288 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 289 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 290 } else { 291 /* this is safe */ 292 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 293 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 294 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 295 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 296 /* which of the two approaches is faster? */ 297 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 298 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 299 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 300 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 301 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 302 } else { 303 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 304 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 305 } 306 } 307 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 308 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 309 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 310 } 311 312 /* Matrix of coarse basis functions (local) */ 313 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 314 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 315 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 316 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 317 if (pcbddc->switch_static || pcbddc->dbg_flag) { 318 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 319 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 320 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 321 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 322 } 323 324 if (pcbddc->dbg_flag) { 325 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 326 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 327 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 328 } 329 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 330 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 331 332 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 333 334 /* vertices */ 335 for (i=0;i<n_vertices;i++) { 336 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 337 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 338 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 339 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 340 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 341 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 342 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 343 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 344 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 345 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 346 if (n_constraints) { 347 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 348 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 349 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 350 } 351 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 352 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 353 354 /* Set values in coarse basis function and subdomain part of coarse_mat */ 355 /* coarse basis functions */ 356 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 357 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 358 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 359 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 360 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 361 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 362 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 363 if (pcbddc->switch_static || pcbddc->dbg_flag) { 364 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 365 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 366 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 367 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 368 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 369 } 370 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 371 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 372 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 373 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 374 if (n_constraints) { 375 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 376 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 377 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 378 } 379 380 /* check */ 381 if (pcbddc->dbg_flag) { 382 /* assemble subdomain vector on local nodes */ 383 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 384 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 385 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 386 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 387 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 388 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 389 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 390 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 391 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 392 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 393 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 394 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 395 if (n_constraints) { 396 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 397 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 398 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 399 } 400 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 401 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 402 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 403 /* check saddle point solution */ 404 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 405 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 406 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 407 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 408 /* shift by the identity matrix */ 409 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 410 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 411 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 412 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 413 } 414 } 415 416 /* constraints */ 417 for (i=0;i<n_constraints;i++) { 418 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 419 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 420 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 421 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 422 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 423 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 424 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 425 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 426 if (n_vertices) { 427 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 428 } 429 /* Set values in coarse basis function and subdomain part of coarse_mat */ 430 /* coarse basis functions */ 431 j = i+n_vertices; /* don't touch this! */ 432 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 433 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 434 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 435 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 436 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 437 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 438 if (pcbddc->switch_static || pcbddc->dbg_flag) { 439 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 440 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 441 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 442 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 443 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 444 } 445 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 446 if (n_vertices) { 447 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 448 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 449 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 450 } 451 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 452 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 453 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 454 455 if (pcbddc->dbg_flag) { 456 /* assemble subdomain vector on nodes */ 457 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 458 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 459 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 460 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 461 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 462 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 463 /* assemble subdomain vector of lagrange multipliers */ 464 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 465 if (n_vertices) { 466 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 467 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 468 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 469 } 470 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 471 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 472 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 473 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 474 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 475 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 476 /* check saddle point solution */ 477 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 478 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 479 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 480 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 481 /* shift by the identity matrix */ 482 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 483 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 484 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 485 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 486 } 487 } 488 /* call assembling routines for local coarse basis */ 489 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 490 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 491 if (pcbddc->switch_static || pcbddc->dbg_flag) { 492 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 493 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 494 } 495 496 /* compute other basis functions for non-symmetric problems */ 497 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 498 if (!setsym || (setsym && !issym)) { 499 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 500 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 501 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 502 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 503 if (pcbddc->switch_static || pcbddc->dbg_flag) { 504 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 505 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 506 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 507 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 508 } 509 for (i=0;i<pcbddc->local_primal_size;i++) { 510 if (n_constraints) { 511 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 512 for (j=0;j<n_constraints;j++) { 513 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 514 } 515 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 516 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 517 } 518 if (i<n_vertices) { 519 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 520 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 521 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 522 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 523 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 524 if (n_constraints) { 525 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 526 } 527 } else { 528 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 529 } 530 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 531 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 532 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 533 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 534 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 535 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 536 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 537 if (i<n_vertices) { 538 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 539 } 540 if (pcbddc->switch_static || pcbddc->dbg_flag) { 541 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 542 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 543 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 544 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 545 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 546 } 547 548 if (pcbddc->dbg_flag) { 549 /* assemble subdomain vector on nodes */ 550 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 551 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 552 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 553 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 554 if (i<n_vertices) { 555 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 556 } 557 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 558 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 559 /* assemble subdomain vector of lagrange multipliers */ 560 for (j=0;j<pcbddc->local_primal_size;j++) { 561 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 562 } 563 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 564 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 565 /* check saddle point solution */ 566 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 567 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 568 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 569 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 570 /* shift by the identity matrix */ 571 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 572 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 573 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 574 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 575 } 576 } 577 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 578 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 579 if (pcbddc->switch_static || pcbddc->dbg_flag) { 580 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 581 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 582 } 583 } 584 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 585 /* Checking coarse_sub_mat and coarse basis functios */ 586 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 587 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 588 if (pcbddc->dbg_flag) { 589 Mat coarse_sub_mat; 590 Mat AUXMAT,TM1,TM2,TM3,TM4; 591 Mat coarse_phi_D,coarse_phi_B; 592 Mat coarse_psi_D,coarse_psi_B; 593 Mat A_II,A_BB,A_IB,A_BI; 594 MatType checkmattype=MATSEQAIJ; 595 PetscReal real_value; 596 597 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 598 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 599 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 600 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 601 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 602 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 603 if (pcbddc->coarse_psi_B) { 604 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 605 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 606 } 607 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 608 609 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 610 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 611 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 612 if (pcbddc->coarse_psi_B) { 613 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 614 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 615 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 616 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 617 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 618 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 619 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 620 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 621 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 622 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 623 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 624 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 625 } else { 626 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 627 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 628 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 629 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 630 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 631 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 632 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 633 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 634 } 635 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 636 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 637 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 638 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 639 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 640 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 641 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 642 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 643 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 644 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 645 for (i=0;i<pcbddc->local_primal_size;i++) { 646 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 647 } 648 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 649 for (i=0;i<pcbddc->local_primal_size;i++) { 650 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 651 } 652 if (pcbddc->coarse_psi_B) { 653 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 654 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 655 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 656 } 657 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 658 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 659 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 660 } 661 } 662 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 663 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 664 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 665 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 666 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 667 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 668 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 669 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 670 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 671 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 672 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 673 if (pcbddc->coarse_psi_B) { 674 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 675 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 676 } 677 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 678 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 679 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 680 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 681 } 682 /* free memory */ 683 if (n_vertices) { 684 ierr = PetscFree(vertices);CHKERRQ(ierr); 685 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 686 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 687 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 688 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 689 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 690 } 691 if (n_constraints) { 692 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 693 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 694 ierr = MatDestroy(&M1);CHKERRQ(ierr); 695 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 696 } 697 ierr = PetscFree(auxindices);CHKERRQ(ierr); 698 /* get back data */ 699 *coarse_submat_vals_n = coarse_submat_vals; 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 705 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 706 { 707 PC_IS* pcis = (PC_IS*)(pc->data); 708 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 709 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 710 PetscBool issbaij,isbaij; 711 /* manage repeated solves */ 712 MatReuse reuse; 713 MatStructure matstruct; 714 PetscErrorCode ierr; 715 716 PetscFunctionBegin; 717 /* get mat flags */ 718 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 719 reuse = MAT_INITIAL_MATRIX; 720 if (pc->setupcalled) { 721 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 722 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 723 if (matstruct == SAME_NONZERO_PATTERN) { 724 reuse = MAT_REUSE_MATRIX; 725 } else { 726 reuse = MAT_INITIAL_MATRIX; 727 } 728 } 729 if (reuse == MAT_INITIAL_MATRIX) { 730 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 731 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 732 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 733 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 734 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 735 } 736 737 /* transform local matrices if needed */ 738 if (pcbddc->use_change_of_basis) { 739 Mat change_mat_all; 740 PetscScalar *row_cmat_values; 741 PetscInt *row_cmat_indices; 742 PetscInt *nnz,*is_indices,*temp_indices; 743 PetscInt i,j,k,n_D,n_B; 744 745 /* Get Non-overlapping dimensions */ 746 n_B = pcis->n_B; 747 n_D = pcis->n-n_B; 748 749 /* compute nonzero structure of change of basis on all local nodes */ 750 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 751 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 752 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 753 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 754 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 755 k=1; 756 for (i=0;i<n_B;i++) { 757 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 758 nnz[is_indices[i]]=j; 759 if (k < j) k = j; 760 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 761 } 762 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 763 /* assemble change of basis matrix on the whole set of local dofs */ 764 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 765 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 766 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 767 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 768 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 769 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 770 for (i=0;i<n_D;i++) { 771 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 772 } 773 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 774 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 775 for (i=0;i<n_B;i++) { 776 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 777 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 778 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 779 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 780 } 781 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 782 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 783 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 784 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 785 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 786 if (!issbaij && !isbaij) { 787 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 788 } else { 789 Mat work_mat; 790 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 791 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 792 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 793 } 794 /* 795 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 796 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 797 */ 798 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 799 ierr = PetscFree(nnz);CHKERRQ(ierr); 800 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 801 } else { 802 /* without change of basis, the local matrix is unchanged */ 803 if (!pcbddc->local_mat) { 804 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 805 pcbddc->local_mat = matis->A; 806 } 807 } 808 809 /* get submatrices */ 810 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 811 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 812 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 813 if (!issbaij) { 814 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 815 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 816 } else { 817 Mat newmat; 818 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 819 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 820 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 821 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 822 } 823 PetscFunctionReturn(0); 824 } 825 826 #undef __FUNCT__ 827 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 828 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 829 { 830 PC_IS* pcis = (PC_IS*)(pc->data); 831 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 832 IS is_aux1,is_aux2; 833 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 834 PetscInt n_vertices,i,j,n_R,n_D,n_B; 835 PetscInt vbs,bs; 836 PetscBT bitmask; 837 PetscErrorCode ierr; 838 839 PetscFunctionBegin; 840 /* Set Non-overlapping dimensions */ 841 n_B = pcis->n_B; n_D = pcis->n - n_B; 842 /* get vertex indices from constraint matrix */ 843 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 844 845 /* create auxiliary bitmask */ 846 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 847 for (i=0;i<n_vertices;i++) { 848 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 849 } 850 851 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 852 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 853 for (i=0, n_R=0; i<pcis->n; i++) { 854 if (!PetscBTLookup(bitmask,i)) { 855 idx_R_local[n_R] = i; 856 n_R++; 857 } 858 } 859 860 /* Block code */ 861 vbs = 1; 862 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 863 if (bs>1 && !(n_vertices%bs)) { 864 PetscBool is_blocked = PETSC_TRUE; 865 PetscInt *vary; 866 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 867 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 868 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 869 for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++; 870 for (i=0; i<n_vertices; i++) { 871 if (vary[i]!=0 && vary[i]!=bs) { 872 is_blocked = PETSC_FALSE; 873 break; 874 } 875 } 876 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 877 vbs = bs; 878 for (i=0;i<n_R/vbs;i++) { 879 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 880 } 881 } 882 ierr = PetscFree(vary);CHKERRQ(ierr); 883 } 884 ierr = PetscFree(vertices);CHKERRQ(ierr); 885 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 886 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 887 888 /* print some info if requested */ 889 if (pcbddc->dbg_flag) { 890 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 891 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 892 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 893 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 894 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 895 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 896 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 897 } 898 899 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 900 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 901 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 902 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 903 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 904 for (i=0; i<n_D; i++) { 905 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 906 } 907 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 908 for (i=0, j=0; i<n_R; i++) { 909 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 910 aux_array1[j++] = i; 911 } 912 } 913 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 914 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 915 for (i=0, j=0; i<n_B; i++) { 916 if (!PetscBTLookup(bitmask,is_indices[i])) { 917 aux_array2[j++] = i; 918 } 919 } 920 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 921 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 922 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 923 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 924 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 925 926 if (pcbddc->switch_static || pcbddc->dbg_flag) { 927 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 928 for (i=0, j=0; i<n_R; i++) { 929 if (PetscBTLookup(bitmask,idx_R_local[i])) { 930 aux_array1[j++] = i; 931 } 932 } 933 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 934 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 935 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 936 } 937 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 938 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 939 PetscFunctionReturn(0); 940 } 941 942 943 #undef __FUNCT__ 944 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 945 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 946 { 947 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 948 PC_IS *pcis = (PC_IS*)pc->data; 949 PC pc_temp; 950 Mat A_RR; 951 Vec vec1,vec2,vec3; 952 MatStructure matstruct; 953 PetscScalar m_one = -1.0; 954 PetscReal value; 955 PetscInt n_D,n_R,ibs,mbs; 956 PetscBool use_exact,use_exact_reduced,issbaij; 957 PetscErrorCode ierr; 958 /* prefixes stuff */ 959 char dir_prefix[256],neu_prefix[256],str_level[3]; 960 size_t len; 961 962 PetscFunctionBegin; 963 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 964 965 /* compute prefixes */ 966 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 967 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 968 if (!pcbddc->current_level) { 969 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 970 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 971 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 972 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 973 } else { 974 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 975 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 976 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 977 len -= 15; /* remove "pc_bddc_coarse_" */ 978 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 979 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 980 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 981 *(dir_prefix+len)='\0'; 982 *(neu_prefix+len)='\0'; 983 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 984 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 985 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 986 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 987 } 988 989 /* DIRICHLET PROBLEM */ 990 /* Matrix for Dirichlet problem is pcis->A_II */ 991 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 992 if (!pcbddc->ksp_D) { /* create object if not yet build */ 993 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 994 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 995 /* default */ 996 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 997 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 998 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 999 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1000 if (issbaij) { 1001 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1002 } else { 1003 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1004 } 1005 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1006 } 1007 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 1008 /* Allow user's customization */ 1009 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1010 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1011 if (!n_D) { 1012 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1013 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1014 } 1015 /* Set Up KSP for Dirichlet problem of BDDC */ 1016 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1017 /* set ksp_D into pcis data */ 1018 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1019 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1020 pcis->ksp_D = pcbddc->ksp_D; 1021 1022 /* NEUMANN PROBLEM */ 1023 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1024 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1025 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1026 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1027 if (ibs != mbs) { 1028 Mat newmat; 1029 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1030 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1031 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1032 } else { 1033 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1034 } 1035 if (!pcbddc->ksp_R) { /* create object if not yet build */ 1036 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1037 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1038 /* default */ 1039 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1040 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1041 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1042 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1043 if (issbaij) { 1044 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1045 } else { 1046 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1047 } 1048 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1049 } 1050 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1051 /* Allow user's customization */ 1052 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1053 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1054 if (!n_R) { 1055 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1056 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1057 } 1058 /* Set Up KSP for Neumann problem of BDDC */ 1059 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1060 1061 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1062 1063 /* Dirichlet */ 1064 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 1065 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1066 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1067 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 1068 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 1069 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1070 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1071 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1072 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1073 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1074 /* need to be adapted? */ 1075 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1076 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1077 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1078 /* print info */ 1079 if (pcbddc->dbg_flag) { 1080 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1081 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1082 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1083 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1084 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1085 } 1086 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1087 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1088 } 1089 1090 /* Neumann */ 1091 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 1092 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1093 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1094 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 1095 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 1096 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1097 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1098 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1099 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1100 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1101 /* need to be adapted? */ 1102 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1103 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1104 /* print info */ 1105 if (pcbddc->dbg_flag) { 1106 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1107 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1108 } 1109 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1110 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1111 } 1112 /* free Neumann problem's matrix */ 1113 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1119 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1120 { 1121 PetscErrorCode ierr; 1122 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1123 1124 PetscFunctionBegin; 1125 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1126 if (pcbddc->local_auxmat1) { 1127 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1128 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1129 } 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1135 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1136 { 1137 PetscErrorCode ierr; 1138 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1139 PC_IS* pcis = (PC_IS*) (pc->data); 1140 const PetscScalar zero = 0.0; 1141 1142 PetscFunctionBegin; 1143 /* Application of PHI^T (or PSI^T) */ 1144 if (pcbddc->coarse_psi_B) { 1145 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1146 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1147 } else { 1148 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1149 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1150 } 1151 /* Scatter data of coarse_rhs */ 1152 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1153 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1154 1155 /* Local solution on R nodes */ 1156 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1157 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1158 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 if (pcbddc->switch_static) { 1160 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1161 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1162 } 1163 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1164 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1165 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1166 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1167 if (pcbddc->switch_static) { 1168 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1169 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1170 } 1171 1172 /* Coarse solution */ 1173 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1174 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1175 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1176 } 1177 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1178 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1179 1180 /* Sum contributions from two levels */ 1181 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1182 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1188 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1189 { 1190 PetscErrorCode ierr; 1191 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1192 1193 PetscFunctionBegin; 1194 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1200 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1201 { 1202 PetscErrorCode ierr; 1203 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1204 1205 PetscFunctionBegin; 1206 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 /* uncomment for testing purposes */ 1211 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1214 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1215 { 1216 PetscErrorCode ierr; 1217 PC_IS* pcis = (PC_IS*)(pc->data); 1218 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1219 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1220 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1221 MatType impMatType=MATSEQAIJ; 1222 /* one and zero */ 1223 PetscScalar one=1.0,zero=0.0; 1224 /* space to store constraints and their local indices */ 1225 PetscScalar *temp_quadrature_constraint; 1226 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1227 /* iterators */ 1228 PetscInt i,j,k,total_counts,temp_start_ptr; 1229 /* stuff to store connected components stored in pcbddc->mat_graph */ 1230 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1231 PetscInt n_ISForFaces,n_ISForEdges; 1232 /* near null space stuff */ 1233 MatNullSpace nearnullsp; 1234 const Vec *nearnullvecs; 1235 Vec *localnearnullsp; 1236 PetscBool nnsp_has_cnst; 1237 PetscInt nnsp_size; 1238 PetscScalar *array; 1239 /* BLAS integers */ 1240 PetscBLASInt lwork,lierr; 1241 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1242 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1243 /* LAPACK working arrays for SVD or POD */ 1244 PetscBool skip_lapack; 1245 PetscScalar *work; 1246 PetscReal *singular_vals; 1247 #if defined(PETSC_USE_COMPLEX) 1248 PetscReal *rwork; 1249 #endif 1250 #if defined(PETSC_MISSING_LAPACK_GESVD) 1251 PetscBLASInt Blas_one_2=1; 1252 PetscScalar *temp_basis,*correlation_mat; 1253 #else 1254 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1255 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1256 #endif 1257 /* change of basis */ 1258 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1259 PetscBool boolforchange,qr_needed; 1260 PetscBT touched,change_basis,qr_needed_idx; 1261 /* auxiliary stuff */ 1262 PetscInt *nnz,*is_indices,*local_to_B; 1263 /* some quantities */ 1264 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1265 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1266 1267 1268 PetscFunctionBegin; 1269 /* Destroy Mat objects computed previously */ 1270 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1271 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1272 /* Get index sets for faces, edges and vertices from graph */ 1273 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1274 pcbddc->use_vertices = PETSC_TRUE; 1275 } 1276 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1277 /* HACK: provide functions to set change of basis */ 1278 if (!ISForVertices && pcbddc->NullSpace) { 1279 pcbddc->use_change_of_basis = PETSC_TRUE; 1280 pcbddc->use_change_on_faces = PETSC_FALSE; 1281 } 1282 /* print some info */ 1283 if (pcbddc->dbg_flag) { 1284 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1285 i = 0; 1286 if (ISForVertices) { 1287 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1288 } 1289 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1290 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1291 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1292 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1293 } 1294 /* check if near null space is attached to global mat */ 1295 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1296 if (nearnullsp) { 1297 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1298 } else { /* if near null space is not provided BDDC uses constants by default */ 1299 nnsp_size = 0; 1300 nnsp_has_cnst = PETSC_TRUE; 1301 } 1302 /* get max number of constraints on a single cc */ 1303 max_constraints = nnsp_size; 1304 if (nnsp_has_cnst) max_constraints++; 1305 1306 /* 1307 Evaluate maximum storage size needed by the procedure 1308 - temp_indices will contain start index of each constraint stored as follows 1309 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1310 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1311 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1312 */ 1313 total_counts = n_ISForFaces+n_ISForEdges; 1314 total_counts *= max_constraints; 1315 n_vertices = 0; 1316 if (ISForVertices) { 1317 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1318 } 1319 total_counts += n_vertices; 1320 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1321 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1322 total_counts = 0; 1323 max_size_of_constraint = 0; 1324 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1325 if (i<n_ISForEdges) { 1326 used_IS = &ISForEdges[i]; 1327 } else { 1328 used_IS = &ISForFaces[i-n_ISForEdges]; 1329 } 1330 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1331 total_counts += j; 1332 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1333 } 1334 total_counts *= max_constraints; 1335 total_counts += n_vertices; 1336 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1337 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1338 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1339 /* local to boundary numbering */ 1340 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1341 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1342 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1343 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1344 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1345 /* get local part of global near null space vectors */ 1346 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1347 for (k=0;k<nnsp_size;k++) { 1348 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1349 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1350 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1351 } 1352 1353 /* whether or not to skip lapack calls */ 1354 skip_lapack = PETSC_TRUE; 1355 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1356 1357 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1358 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1359 PetscScalar temp_work; 1360 #if defined(PETSC_MISSING_LAPACK_GESVD) 1361 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1362 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1363 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1364 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1365 #if defined(PETSC_USE_COMPLEX) 1366 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1367 #endif 1368 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1369 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1370 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1371 lwork = -1; 1372 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1373 #if !defined(PETSC_USE_COMPLEX) 1374 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1375 #else 1376 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1377 #endif 1378 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1379 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1380 #else /* on missing GESVD */ 1381 /* SVD */ 1382 PetscInt max_n,min_n; 1383 max_n = max_size_of_constraint; 1384 min_n = max_constraints; 1385 if (max_size_of_constraint < max_constraints) { 1386 min_n = max_size_of_constraint; 1387 max_n = max_constraints; 1388 } 1389 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1390 #if defined(PETSC_USE_COMPLEX) 1391 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1392 #endif 1393 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1394 lwork = -1; 1395 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1396 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1397 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1398 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1399 #if !defined(PETSC_USE_COMPLEX) 1400 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1401 #else 1402 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1403 #endif 1404 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1405 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1406 #endif /* on missing GESVD */ 1407 /* Allocate optimal workspace */ 1408 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1409 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1410 } 1411 /* Now we can loop on constraining sets */ 1412 total_counts = 0; 1413 temp_indices[0] = 0; 1414 /* vertices */ 1415 if (ISForVertices) { 1416 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1417 if (nnsp_has_cnst) { /* consider all vertices */ 1418 for (i=0;i<n_vertices;i++) { 1419 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1420 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1421 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1422 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1423 total_counts++; 1424 } 1425 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1426 PetscBool used_vertex; 1427 for (i=0;i<n_vertices;i++) { 1428 used_vertex = PETSC_FALSE; 1429 k = 0; 1430 while (!used_vertex && k<nnsp_size) { 1431 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1432 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1433 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1434 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1435 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1436 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1437 total_counts++; 1438 used_vertex = PETSC_TRUE; 1439 } 1440 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1441 k++; 1442 } 1443 } 1444 } 1445 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1446 n_vertices = total_counts; 1447 } 1448 1449 /* edges and faces */ 1450 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1451 if (i<n_ISForEdges) { 1452 used_IS = &ISForEdges[i]; 1453 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1454 } else { 1455 used_IS = &ISForFaces[i-n_ISForEdges]; 1456 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1457 } 1458 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1459 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1460 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1461 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1462 /* change of basis should not be performed on local periodic nodes */ 1463 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1464 if (nnsp_has_cnst) { 1465 PetscScalar quad_value; 1466 temp_constraints++; 1467 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1468 for (j=0;j<size_of_constraint;j++) { 1469 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1470 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1471 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1472 } 1473 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1474 total_counts++; 1475 } 1476 for (k=0;k<nnsp_size;k++) { 1477 PetscReal real_value; 1478 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1479 for (j=0;j<size_of_constraint;j++) { 1480 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1481 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1482 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1483 } 1484 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1485 /* check if array is null on the connected component */ 1486 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1487 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1488 if (real_value > 0.0) { /* keep indices and values */ 1489 temp_constraints++; 1490 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1491 total_counts++; 1492 } 1493 } 1494 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1495 valid_constraints = temp_constraints; 1496 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1497 if (!pcbddc->use_nnsp_true && temp_constraints) { 1498 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1499 1500 #if defined(PETSC_MISSING_LAPACK_GESVD) 1501 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1502 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1503 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1504 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1505 from that computed using LAPACKgesvd 1506 -> This is due to a different computation of eigenvectors in LAPACKheev 1507 -> The quality of the POD-computed basis will be the same */ 1508 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1509 /* Store upper triangular part of correlation matrix */ 1510 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1511 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1512 for (j=0;j<temp_constraints;j++) { 1513 for (k=0;k<j+1;k++) { 1514 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1515 } 1516 } 1517 /* compute eigenvalues and eigenvectors of correlation matrix */ 1518 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1519 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1520 #if !defined(PETSC_USE_COMPLEX) 1521 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1522 #else 1523 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1524 #endif 1525 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1526 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1527 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1528 j=0; 1529 while (j < temp_constraints && singular_vals[j] < tol) j++; 1530 total_counts=total_counts-j; 1531 valid_constraints = temp_constraints-j; 1532 /* scale and copy POD basis into used quadrature memory */ 1533 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1534 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1535 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1537 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1538 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1539 if (j<temp_constraints) { 1540 PetscInt ii; 1541 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1542 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1543 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1544 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1545 for (k=0;k<temp_constraints-j;k++) { 1546 for (ii=0;ii<size_of_constraint;ii++) { 1547 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1548 } 1549 } 1550 } 1551 #else /* on missing GESVD */ 1552 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1553 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1554 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1555 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1556 #if !defined(PETSC_USE_COMPLEX) 1557 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1558 #else 1559 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1560 #endif 1561 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1562 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1563 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1564 k = temp_constraints; 1565 if (k > size_of_constraint) k = size_of_constraint; 1566 j = 0; 1567 while (j < k && singular_vals[k-j-1] < tol) j++; 1568 total_counts = total_counts-temp_constraints+k-j; 1569 valid_constraints = k-j; 1570 #endif /* on missing GESVD */ 1571 } 1572 /* setting change_of_basis flag is safe now */ 1573 if (boolforchange) { 1574 for (j=0;j<valid_constraints;j++) { 1575 PetscBTSet(change_basis,total_counts-j-1); 1576 } 1577 } 1578 } 1579 /* free index sets of faces, edges and vertices */ 1580 for (i=0;i<n_ISForFaces;i++) { 1581 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1582 } 1583 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1584 for (i=0;i<n_ISForEdges;i++) { 1585 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1586 } 1587 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1588 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1589 1590 /* free workspace */ 1591 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1592 ierr = PetscFree(work);CHKERRQ(ierr); 1593 #if defined(PETSC_USE_COMPLEX) 1594 ierr = PetscFree(rwork);CHKERRQ(ierr); 1595 #endif 1596 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1597 #if defined(PETSC_MISSING_LAPACK_GESVD) 1598 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1599 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1600 #endif 1601 } 1602 for (k=0;k<nnsp_size;k++) { 1603 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1604 } 1605 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1606 1607 /* set quantities in pcbddc data structure */ 1608 /* n_vertices defines the number of subdomain corners in the primal space */ 1609 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1610 pcbddc->local_primal_size = total_counts; 1611 pcbddc->n_vertices = n_vertices; 1612 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1613 1614 /* Create constraint matrix */ 1615 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1616 /* so we need to set it up properly either with or without change of basis */ 1617 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1618 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1619 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1620 /* array to compute a local numbering of constraints : vertices first then constraints */ 1621 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1622 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1623 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1624 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1625 /* auxiliary stuff for basis change */ 1626 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1627 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1628 1629 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1630 total_primal_vertices=0; 1631 for (i=0;i<pcbddc->local_primal_size;i++) { 1632 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1633 if (size_of_constraint == 1) { 1634 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1635 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1636 aux_primal_minloc[total_primal_vertices]=0; 1637 total_primal_vertices++; 1638 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1639 PetscInt min_loc,min_index; 1640 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1641 /* find first untouched local node */ 1642 k = 0; 1643 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1644 min_index = global_indices[k]; 1645 min_loc = k; 1646 /* search the minimum among global nodes already untouched on the cc */ 1647 for (k=1;k<size_of_constraint;k++) { 1648 /* there can be more than one constraint on a single connected component */ 1649 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1650 min_index = global_indices[k]; 1651 min_loc = k; 1652 } 1653 } 1654 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1655 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1656 aux_primal_minloc[total_primal_vertices]=min_loc; 1657 total_primal_vertices++; 1658 } 1659 } 1660 /* determine if a QR strategy is needed for change of basis */ 1661 qr_needed = PETSC_FALSE; 1662 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1663 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1664 if (PetscBTLookup(change_basis,i)) { 1665 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1666 j = 0; 1667 for (k=0;k<size_of_constraint;k++) { 1668 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1669 j++; 1670 } 1671 } 1672 /* found more than one primal dof on the cc */ 1673 if (j > 1) { 1674 PetscBTSet(qr_needed_idx,i); 1675 qr_needed = PETSC_TRUE; 1676 } 1677 } 1678 } 1679 /* free workspace */ 1680 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1681 1682 /* permute indices in order to have a sorted set of vertices */ 1683 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1684 1685 /* nonzero structure of constraint matrix */ 1686 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1687 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1688 j=total_primal_vertices; 1689 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1690 if (!PetscBTLookup(change_basis,i)) { 1691 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1692 j++; 1693 } 1694 } 1695 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1696 ierr = PetscFree(nnz);CHKERRQ(ierr); 1697 /* set values in constraint matrix */ 1698 for (i=0;i<total_primal_vertices;i++) { 1699 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1700 } 1701 total_counts = total_primal_vertices; 1702 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1703 if (!PetscBTLookup(change_basis,i)) { 1704 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1705 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1706 total_counts++; 1707 } 1708 } 1709 /* assembling */ 1710 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1711 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1712 /* 1713 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1714 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1715 */ 1716 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1717 if (pcbddc->use_change_of_basis) { 1718 /* dual and primal dofs on a single cc */ 1719 PetscInt dual_dofs,primal_dofs; 1720 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1721 PetscInt primal_counter; 1722 /* working stuff for GEQRF */ 1723 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1724 PetscBLASInt lqr_work; 1725 /* working stuff for UNGQR */ 1726 PetscScalar *gqr_work,lgqr_work_t; 1727 PetscBLASInt lgqr_work; 1728 /* working stuff for TRTRS */ 1729 PetscScalar *trs_rhs; 1730 PetscBLASInt Blas_NRHS; 1731 /* pointers for values insertion into change of basis matrix */ 1732 PetscInt *start_rows,*start_cols; 1733 PetscScalar *start_vals; 1734 /* working stuff for values insertion */ 1735 PetscBT is_primal; 1736 1737 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1738 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1739 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1740 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1741 /* work arrays */ 1742 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1743 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1744 /* nonzeros per row */ 1745 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1746 if (PetscBTLookup(change_basis,i)) { 1747 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1748 if (PetscBTLookup(qr_needed_idx,i)) { 1749 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1750 } else { 1751 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1752 /* get local primal index on the cc */ 1753 j = 0; 1754 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1755 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1756 } 1757 } 1758 } 1759 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1760 ierr = PetscFree(nnz);CHKERRQ(ierr); 1761 /* Set initial identity in the matrix */ 1762 for (i=0;i<pcis->n_B;i++) { 1763 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1764 } 1765 1766 if (pcbddc->dbg_flag) { 1767 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1768 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1769 } 1770 1771 1772 /* Now we loop on the constraints which need a change of basis */ 1773 /* 1774 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1775 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1776 1777 Basic blocks of change of basis matrix T computed 1778 1779 - Using the following block transformation if there is only a primal dof on the cc 1780 (in the example, primal dof is the last one of the edge in LOCAL ordering 1781 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1782 | 1 0 ... 0 1 | 1783 | 0 1 ... 0 1 | 1784 | ... | 1785 | 0 ... 1 1 | 1786 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1787 1788 - via QR decomposition of constraints otherwise 1789 */ 1790 if (qr_needed) { 1791 /* space to store Q */ 1792 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1793 /* first we issue queries for optimal work */ 1794 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1795 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1796 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1797 lqr_work = -1; 1798 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1799 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1800 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1801 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1802 lgqr_work = -1; 1803 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1804 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1805 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1806 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1807 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1808 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1809 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1810 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1811 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1812 /* array to store scaling factors for reflectors */ 1813 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1814 /* array to store rhs and solution of triangular solver */ 1815 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1816 /* allocating workspace for check */ 1817 if (pcbddc->dbg_flag) { 1818 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1819 } 1820 } 1821 /* array to store whether a node is primal or not */ 1822 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1823 for (i=0;i<total_primal_vertices;i++) { 1824 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1825 } 1826 1827 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1828 /* -> using implicit ordering contained in temp_indices data */ 1829 total_counts = pcbddc->n_vertices; 1830 primal_counter = total_counts; 1831 while (total_counts<pcbddc->local_primal_size) { 1832 primal_dofs = 1; 1833 if (PetscBTLookup(change_basis,total_counts)) { 1834 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1835 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1836 primal_dofs++; 1837 } 1838 /* get constraint info */ 1839 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1840 dual_dofs = size_of_constraint-primal_dofs; 1841 1842 if (pcbddc->dbg_flag) { 1843 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1844 } 1845 1846 if (primal_dofs > 1) { /* QR */ 1847 1848 /* copy quadrature constraints for change of basis check */ 1849 if (pcbddc->dbg_flag) { 1850 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1851 } 1852 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1853 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1854 1855 /* compute QR decomposition of constraints */ 1856 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1857 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1858 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1859 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1860 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1861 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1862 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1863 1864 /* explictly compute R^-T */ 1865 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1866 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1867 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1868 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1869 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1870 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1871 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1872 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1873 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1874 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1875 1876 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1877 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1878 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1879 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1880 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1881 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1882 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1883 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1884 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1885 1886 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1887 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1888 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1889 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1890 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1891 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1892 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1893 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1894 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1895 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1896 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1897 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1898 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1899 1900 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1901 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1902 /* insert cols for primal dofs */ 1903 for (j=0;j<primal_dofs;j++) { 1904 start_vals = &qr_basis[j*size_of_constraint]; 1905 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1906 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1907 } 1908 /* insert cols for dual dofs */ 1909 for (j=0,k=0;j<dual_dofs;k++) { 1910 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1911 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1912 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1913 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1914 j++; 1915 } 1916 } 1917 1918 /* check change of basis */ 1919 if (pcbddc->dbg_flag) { 1920 PetscInt ii,jj; 1921 PetscBool valid_qr=PETSC_TRUE; 1922 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1923 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1924 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1925 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1926 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1927 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1928 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1929 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1930 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1931 for (jj=0;jj<size_of_constraint;jj++) { 1932 for (ii=0;ii<primal_dofs;ii++) { 1933 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1934 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1935 } 1936 } 1937 if (!valid_qr) { 1938 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1939 for (jj=0;jj<size_of_constraint;jj++) { 1940 for (ii=0;ii<primal_dofs;ii++) { 1941 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1942 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1943 } 1944 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1945 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1946 } 1947 } 1948 } 1949 } else { 1950 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1951 } 1952 } 1953 } else { /* simple transformation block */ 1954 PetscInt row,col; 1955 PetscScalar val; 1956 for (j=0;j<size_of_constraint;j++) { 1957 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 1958 if (!PetscBTLookup(is_primal,row)) { 1959 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1960 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 1961 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 1962 } else { 1963 for (k=0;k<size_of_constraint;k++) { 1964 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1965 if (row != col) { 1966 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1967 } else { 1968 val = 1.0; 1969 } 1970 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 1971 } 1972 } 1973 } 1974 } 1975 /* increment primal counter */ 1976 primal_counter += primal_dofs; 1977 } else { 1978 if (pcbddc->dbg_flag) { 1979 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1980 } 1981 } 1982 /* increment constraint counter total_counts */ 1983 total_counts += primal_dofs; 1984 } 1985 1986 /* free workspace */ 1987 if (qr_needed) { 1988 if (pcbddc->dbg_flag) { 1989 ierr = PetscFree(work);CHKERRQ(ierr); 1990 } 1991 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1992 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1993 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1994 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1995 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1996 } 1997 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1998 /* assembling */ 1999 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2000 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2001 /* 2002 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2003 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2004 */ 2005 } 2006 2007 /* flush dbg viewer */ 2008 if (pcbddc->dbg_flag) { 2009 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2010 } 2011 2012 /* free workspace */ 2013 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2014 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2015 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2016 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2017 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2018 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2019 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2020 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2021 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2022 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2023 PetscFunctionReturn(0); 2024 } 2025 2026 #undef __FUNCT__ 2027 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2028 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2029 { 2030 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2031 PC_IS *pcis = (PC_IS*)pc->data; 2032 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2033 PetscInt bs,ierr,i,vertex_size; 2034 PetscViewer viewer=pcbddc->dbg_viewer; 2035 2036 PetscFunctionBegin; 2037 /* Reset previously computed graph */ 2038 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2039 /* Init local Graph struct */ 2040 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2041 2042 /* Check validity of the csr graph passed in by the user */ 2043 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2044 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2045 } 2046 2047 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2048 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2049 Mat mat_adj; 2050 const PetscInt *xadj,*adjncy; 2051 PetscBool flg_row=PETSC_TRUE; 2052 2053 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2054 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2055 if (!flg_row) { 2056 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2057 } 2058 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2059 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2060 if (!flg_row) { 2061 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2062 } 2063 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2064 } 2065 2066 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2067 vertex_size = 1; 2068 if (!pcbddc->user_provided_isfordofs) { 2069 if (!pcbddc->n_ISForDofs) { 2070 IS *custom_ISForDofs; 2071 2072 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2073 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2074 for (i=0;i<bs;i++) { 2075 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2076 } 2077 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2078 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2079 /* remove my references to IS objects */ 2080 for (i=0;i<bs;i++) { 2081 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2082 } 2083 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2084 } 2085 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2086 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2087 } 2088 2089 /* Setup of Graph */ 2090 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2091 2092 /* Graph's connected components analysis */ 2093 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2094 2095 /* print some info to stdout */ 2096 if (pcbddc->dbg_flag) { 2097 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2098 } 2099 PetscFunctionReturn(0); 2100 } 2101 2102 #undef __FUNCT__ 2103 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2104 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2105 { 2106 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2107 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2108 PetscErrorCode ierr; 2109 2110 PetscFunctionBegin; 2111 n = 0; 2112 vertices = 0; 2113 if (pcbddc->ConstraintMatrix) { 2114 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2115 for (i=0;i<local_primal_size;i++) { 2116 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2117 if (size_of_constraint == 1) n++; 2118 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2119 } 2120 if (vertices_idx) { 2121 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2122 n = 0; 2123 for (i=0;i<local_primal_size;i++) { 2124 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2125 if (size_of_constraint == 1) { 2126 vertices[n++]=row_cmat_indices[0]; 2127 } 2128 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2129 } 2130 } 2131 } 2132 *n_vertices = n; 2133 if (vertices_idx) *vertices_idx = vertices; 2134 PetscFunctionReturn(0); 2135 } 2136 2137 #undef __FUNCT__ 2138 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2139 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2140 { 2141 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2142 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2143 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2144 PetscBT touched; 2145 PetscErrorCode ierr; 2146 2147 /* This function assumes that the number of local constraints per connected component 2148 is not greater than the number of nodes defined for the connected component 2149 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2150 PetscFunctionBegin; 2151 n = 0; 2152 constraints_index = 0; 2153 if (pcbddc->ConstraintMatrix) { 2154 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2155 max_size_of_constraint = 0; 2156 for (i=0;i<local_primal_size;i++) { 2157 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2158 if (size_of_constraint > 1) { 2159 n++; 2160 } 2161 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2162 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2163 } 2164 if (constraints_idx) { 2165 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2166 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2167 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2168 n = 0; 2169 for (i=0;i<local_primal_size;i++) { 2170 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2171 if (size_of_constraint > 1) { 2172 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2173 /* find first untouched local node */ 2174 j = 0; 2175 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2176 min_index = row_cmat_global_indices[j]; 2177 min_loc = j; 2178 /* search the minimum among nodes not yet touched on the connected component 2179 since there can be more than one constraint on a single cc */ 2180 for (j=1;j<size_of_constraint;j++) { 2181 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2182 min_index = row_cmat_global_indices[j]; 2183 min_loc = j; 2184 } 2185 } 2186 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2187 constraints_index[n++] = row_cmat_indices[min_loc]; 2188 } 2189 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2190 } 2191 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2192 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2193 } 2194 } 2195 *n_constraints = n; 2196 if (constraints_idx) *constraints_idx = constraints_index; 2197 PetscFunctionReturn(0); 2198 } 2199 2200 /* the next two functions has been adapted from pcis.c */ 2201 #undef __FUNCT__ 2202 #define __FUNCT__ "PCBDDCApplySchur" 2203 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2204 { 2205 PetscErrorCode ierr; 2206 PC_IS *pcis = (PC_IS*)(pc->data); 2207 2208 PetscFunctionBegin; 2209 if (!vec2_B) { vec2_B = v; } 2210 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2211 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2212 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2213 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2214 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2215 PetscFunctionReturn(0); 2216 } 2217 2218 #undef __FUNCT__ 2219 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2220 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2221 { 2222 PetscErrorCode ierr; 2223 PC_IS *pcis = (PC_IS*)(pc->data); 2224 2225 PetscFunctionBegin; 2226 if (!vec2_B) { vec2_B = v; } 2227 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2228 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2229 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2230 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2231 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2232 PetscFunctionReturn(0); 2233 } 2234 2235 #undef __FUNCT__ 2236 #define __FUNCT__ "PCBDDCSubsetNumbering" 2237 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2238 { 2239 Vec local_vec,global_vec; 2240 IS seqis,paris; 2241 VecScatter scatter_ctx; 2242 PetscScalar *array; 2243 PetscInt *temp_global_dofs; 2244 PetscScalar globalsum; 2245 PetscInt i,j,s; 2246 PetscInt nlocals,first_index,old_index,max_local; 2247 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2248 PetscMPIInt *dof_sizes,*dof_displs; 2249 PetscBool first_found; 2250 PetscErrorCode ierr; 2251 2252 PetscFunctionBegin; 2253 /* mpi buffers */ 2254 MPI_Comm_size(comm,&size_prec_comm); 2255 MPI_Comm_rank(comm,&rank_prec_comm); 2256 j = ( !rank_prec_comm ? size_prec_comm : 0); 2257 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2258 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2259 /* get maximum size of subset */ 2260 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2261 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2262 max_local = 0; 2263 if (n_local_dofs) { 2264 max_local = temp_global_dofs[0]; 2265 for (i=1;i<n_local_dofs;i++) { 2266 if (max_local < temp_global_dofs[i] ) { 2267 max_local = temp_global_dofs[i]; 2268 } 2269 } 2270 } 2271 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2272 max_global++; 2273 max_local = 0; 2274 if (n_local_dofs) { 2275 max_local = local_dofs[0]; 2276 for (i=1;i<n_local_dofs;i++) { 2277 if (max_local < local_dofs[i] ) { 2278 max_local = local_dofs[i]; 2279 } 2280 } 2281 } 2282 max_local++; 2283 /* allocate workspace */ 2284 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2285 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2286 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2287 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2288 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2289 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2290 /* create scatter */ 2291 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2292 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2293 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2294 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2295 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2296 /* init array */ 2297 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2298 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2299 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2300 if (local_dofs_mult) { 2301 for (i=0;i<n_local_dofs;i++) { 2302 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2303 } 2304 } else { 2305 for (i=0;i<n_local_dofs;i++) { 2306 array[local_dofs[i]]=1.0; 2307 } 2308 } 2309 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2310 /* scatter into global vec and get total number of global dofs */ 2311 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2312 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2313 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2314 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2315 /* Fill global_vec with cumulative function for global numbering */ 2316 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2317 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2318 nlocals = 0; 2319 first_index = -1; 2320 first_found = PETSC_FALSE; 2321 for (i=0;i<s;i++) { 2322 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2323 first_found = PETSC_TRUE; 2324 first_index = i; 2325 } 2326 nlocals += (PetscInt)PetscRealPart(array[i]); 2327 } 2328 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2329 if (!rank_prec_comm) { 2330 dof_displs[0]=0; 2331 for (i=1;i<size_prec_comm;i++) { 2332 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2333 } 2334 } 2335 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2336 if (first_found) { 2337 array[first_index] += (PetscScalar)nlocals; 2338 old_index = first_index; 2339 for (i=first_index+1;i<s;i++) { 2340 if (PetscRealPart(array[i]) > 0.0) { 2341 array[i] += array[old_index]; 2342 old_index = i; 2343 } 2344 } 2345 } 2346 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2347 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2348 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2349 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2350 /* get global ordering of local dofs */ 2351 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2352 if (local_dofs_mult) { 2353 for (i=0;i<n_local_dofs;i++) { 2354 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2355 } 2356 } else { 2357 for (i=0;i<n_local_dofs;i++) { 2358 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2359 } 2360 } 2361 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2362 /* free workspace */ 2363 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2364 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2365 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2366 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2367 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2368 /* return pointer to global ordering of local dofs */ 2369 *global_numbering_subset = temp_global_dofs; 2370 PetscFunctionReturn(0); 2371 } 2372 2373 #undef __FUNCT__ 2374 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2375 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2376 { 2377 PetscInt i,j; 2378 PetscScalar *alphas; 2379 PetscErrorCode ierr; 2380 2381 PetscFunctionBegin; 2382 /* this implements stabilized Gram-Schmidt */ 2383 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2384 for (i=0;i<n;i++) { 2385 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2386 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2387 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2388 } 2389 ierr = PetscFree(alphas);CHKERRQ(ierr); 2390 PetscFunctionReturn(0); 2391 } 2392 2393 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2394 #undef __FUNCT__ 2395 #define __FUNCT__ "MatConvert_IS_AIJ" 2396 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2397 { 2398 Mat new_mat; 2399 Mat_IS *matis = (Mat_IS*)(mat->data); 2400 /* info on mat */ 2401 PetscInt bs,rows,cols; 2402 PetscInt lrows,lcols; 2403 PetscInt local_rows,local_cols; 2404 PetscMPIInt nsubdomains; 2405 /* preallocation */ 2406 Vec vec_dnz,vec_onz; 2407 PetscScalar *my_dnz,*my_onz,*array; 2408 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2409 PetscInt index_row,index_col,owner; 2410 PetscInt *local_indices,*global_indices; 2411 /* work */ 2412 PetscInt i,j; 2413 PetscErrorCode ierr; 2414 2415 PetscFunctionBegin; 2416 /* CHECKS */ 2417 /* get info from mat */ 2418 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2419 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2420 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2421 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2422 2423 /* MAT_INITIAL_MATRIX */ 2424 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2425 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2426 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2427 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2428 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2429 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2430 2431 /* 2432 preallocation 2433 */ 2434 2435 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2436 /* 2437 Some vectors are needed to sum up properly on shared interface dofs. 2438 Note that preallocation is not exact, since it overestimates nonzeros 2439 */ 2440 /* 2441 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2442 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2443 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2444 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2445 */ 2446 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2447 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2448 /* All processes need to compute entire row ownership */ 2449 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2450 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2451 for (i=0;i<nsubdomains;i++) { 2452 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2453 row_ownership[j]=i; 2454 } 2455 } 2456 /* map indices of local mat to global values */ 2457 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2458 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2459 for (i=0;i<local_rows;i++) local_indices[i]=i; 2460 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2461 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2462 2463 /* 2464 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2465 then, they will be summed up properly. This way, preallocation is always sufficient 2466 */ 2467 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2468 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2469 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2470 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2471 for (i=0;i<local_rows;i++) { 2472 index_row = global_indices[i]; 2473 for (j=i;j<local_rows;j++) { 2474 owner = row_ownership[index_row]; 2475 index_col = global_indices[j]; 2476 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2477 my_dnz[i] += 1.0; 2478 } else { /* offdiag block */ 2479 my_onz[i] += 1.0; 2480 } 2481 /* same as before, interchanging rows and cols */ 2482 if (i != j) { 2483 owner = row_ownership[index_col]; 2484 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2485 my_dnz[j] += 1.0; 2486 } else { 2487 my_onz[j] += 1.0; 2488 } 2489 } 2490 } 2491 } 2492 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2493 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2494 if (local_rows) { /* multilevel guard */ 2495 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2496 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2497 } 2498 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2499 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2500 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2501 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2502 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2503 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2504 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2505 2506 /* set computed preallocation in dnz and onz */ 2507 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2508 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2509 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2510 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2511 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2512 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2513 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2514 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2515 2516 /* Resize preallocation if overestimated */ 2517 for (i=0;i<lrows;i++) { 2518 dnz[i] = PetscMin(dnz[i],lcols); 2519 onz[i] = PetscMin(onz[i],cols-lcols); 2520 } 2521 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2522 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2523 2524 /* 2525 Set values. Very Basic. 2526 */ 2527 for (i=0;i<local_rows;i++) { 2528 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2529 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2530 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2531 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2532 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2533 } 2534 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2535 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2536 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2537 2538 /* get back mat */ 2539 *M = new_mat; 2540 PetscFunctionReturn(0); 2541 } 2542 2543 #undef __FUNCT__ 2544 #define __FUNCT__ "MatISSubassemble_Private" 2545 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2546 { 2547 Mat subdomain_adj; 2548 IS new_ranks,ranks_send_to; 2549 MatPartitioning partitioner; 2550 Mat_IS *matis; 2551 PetscInt n_neighs,*neighs,*n_shared,**shared; 2552 PetscInt prank; 2553 PetscMPIInt size,rank,color; 2554 PetscInt *xadj,*adjncy,*oldranks; 2555 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2556 PetscInt i,j,n_subdomains,local_size,threshold=0; 2557 PetscErrorCode ierr; 2558 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2559 PetscSubcomm subcomm; 2560 2561 PetscFunctionBegin; 2562 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2563 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2564 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2565 2566 /* Get info on mapping */ 2567 matis = (Mat_IS*)(mat->data); 2568 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2569 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2570 2571 /* build local CSR graph of subdomains' connectivity */ 2572 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2573 xadj[0] = 0; 2574 xadj[1] = PetscMax(n_neighs-1,0); 2575 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2576 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2577 2578 if (threshold) { 2579 PetscInt* count,min_threshold; 2580 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2581 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2582 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2583 for (j=0;j<n_shared[i];j++) { 2584 count[shared[i][j]] += 1; 2585 } 2586 } 2587 /* adapt threshold since we dont want to lose significant connections */ 2588 min_threshold = n_neighs; 2589 for (i=1;i<n_neighs;i++) { 2590 for (j=0;j<n_shared[i];j++) { 2591 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2592 } 2593 } 2594 PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold); 2595 threshold = PetscMax(min_threshold+1,threshold); 2596 PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold); 2597 xadj[1] = 0; 2598 for (i=1;i<n_neighs;i++) { 2599 for (j=0;j<n_shared[i];j++) { 2600 if (count[shared[i][j]] < threshold) { 2601 adjncy[xadj[1]] = neighs[i]; 2602 adjncy_wgt[xadj[1]] = n_shared[i]; 2603 xadj[1]++; 2604 break; 2605 } 2606 } 2607 } 2608 ierr = PetscFree(count);CHKERRQ(ierr); 2609 } else { 2610 if (xadj[1]) { 2611 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2612 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2613 } 2614 } 2615 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2616 if (use_square) { 2617 for (i=0;i<xadj[1];i++) { 2618 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2619 } 2620 } 2621 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2622 2623 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2624 2625 /* 2626 Restrict work on active processes only. 2627 */ 2628 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2629 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2630 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2631 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2632 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2633 if (color) { 2634 ierr = PetscFree(xadj);CHKERRQ(ierr); 2635 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2636 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2637 } else { 2638 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2639 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2640 prank = rank; 2641 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2642 for (i=0;i<size;i++) { 2643 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2644 } 2645 for (i=0;i<xadj[1];i++) { 2646 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2647 } 2648 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2649 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2650 n_subdomains = (PetscInt)size; 2651 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2652 2653 /* Partition */ 2654 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2655 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2656 if (use_vwgt) { 2657 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2658 v_wgt[0] = local_size; 2659 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2660 } 2661 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2662 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2663 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2664 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2665 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2666 2667 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2668 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2669 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2670 /* clean up */ 2671 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2672 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2673 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2674 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2675 } 2676 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2677 2678 /* assemble parallel IS for sends */ 2679 i = 1; 2680 if (color) i=0; 2681 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2682 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2683 2684 /* get back IS */ 2685 *is_sends = ranks_send_to; 2686 PetscFunctionReturn(0); 2687 } 2688 2689 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2690 2691 #undef __FUNCT__ 2692 #define __FUNCT__ "MatISSubassemble" 2693 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2694 { 2695 Mat local_mat,new_mat; 2696 Mat_IS *matis; 2697 IS is_sends_internal; 2698 PetscInt rows,cols; 2699 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2700 PetscBool ismatis,isdense; 2701 ISLocalToGlobalMapping l2gmap; 2702 PetscInt* l2gmap_indices; 2703 const PetscInt* is_indices; 2704 MatType new_local_type; 2705 MatTypePrivate new_local_type_private; 2706 /* buffers */ 2707 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2708 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2709 /* MPI */ 2710 MPI_Comm comm; 2711 PetscMPIInt n_sends,n_recvs,commsize; 2712 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2713 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2714 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2715 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2716 PetscErrorCode ierr; 2717 2718 PetscFunctionBegin; 2719 /* checks */ 2720 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2721 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2722 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2723 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2724 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2725 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2726 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2727 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2728 PetscValidLogicalCollectiveInt(mat,bs,0); 2729 /* prepare IS for sending if not provided */ 2730 if (!is_sends) { 2731 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2732 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2733 } else { 2734 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2735 is_sends_internal = is_sends; 2736 } 2737 2738 /* get pointer of MATIS data */ 2739 matis = (Mat_IS*)mat->data; 2740 2741 /* get comm */ 2742 comm = PetscObjectComm((PetscObject)mat); 2743 2744 /* compute number of sends */ 2745 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2746 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2747 2748 /* compute number of receives */ 2749 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2750 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2751 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2752 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2753 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2754 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2755 ierr = PetscFree(iflags);CHKERRQ(ierr); 2756 2757 /* prepare send/receive buffers */ 2758 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2759 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2760 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2761 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2762 2763 /* Get data from local mat */ 2764 if (!isdense) { 2765 /* TODO: See below some guidelines on how to prepare the local buffers */ 2766 /* 2767 send_buffer_vals should contain the raw values of the local matrix 2768 send_buffer_idxs should contain: 2769 - MatType_PRIVATE type 2770 - PetscInt size_of_l2gmap 2771 - PetscInt global_row_indices[size_of_l2gmap] 2772 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2773 */ 2774 } else { 2775 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2776 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2777 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2778 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2779 send_buffer_idxs[1] = i; 2780 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2781 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2782 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2783 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2784 for (i=0;i<n_sends;i++) { 2785 ilengths_vals[is_indices[i]] = len*len; 2786 ilengths_idxs[is_indices[i]] = len+2; 2787 } 2788 } 2789 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2790 buf_size_idxs = 0; 2791 buf_size_vals = 0; 2792 for (i=0;i<n_recvs;i++) { 2793 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2794 buf_size_vals += (PetscInt)olengths_vals[i]; 2795 } 2796 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2797 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2798 2799 /* get new tags for clean communications */ 2800 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2801 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2802 2803 /* allocate for requests */ 2804 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2805 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2806 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2807 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2808 2809 /* communications */ 2810 ptr_idxs = recv_buffer_idxs; 2811 ptr_vals = recv_buffer_vals; 2812 for (i=0;i<n_recvs;i++) { 2813 source_dest = onodes[i]; 2814 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2815 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2816 ptr_idxs += olengths_idxs[i]; 2817 ptr_vals += olengths_vals[i]; 2818 } 2819 for (i=0;i<n_sends;i++) { 2820 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2821 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2822 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2823 } 2824 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2825 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2826 2827 /* assemble new l2g map */ 2828 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2829 ptr_idxs = recv_buffer_idxs; 2830 buf_size_idxs = 0; 2831 for (i=0;i<n_recvs;i++) { 2832 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2833 ptr_idxs += olengths_idxs[i]; 2834 } 2835 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2836 ptr_idxs = recv_buffer_idxs; 2837 buf_size_idxs = 0; 2838 for (i=0;i<n_recvs;i++) { 2839 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2840 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2841 ptr_idxs += olengths_idxs[i]; 2842 } 2843 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2844 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2845 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2846 2847 /* infer new local matrix type from received local matrices type */ 2848 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2849 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2850 new_local_type_private = MATAIJ_PRIVATE; 2851 new_local_type = MATSEQAIJ; 2852 if (n_recvs) { 2853 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 2854 ptr_idxs = recv_buffer_idxs; 2855 for (i=0;i<n_recvs;i++) { 2856 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2857 new_local_type_private = MATAIJ_PRIVATE; 2858 break; 2859 } 2860 ptr_idxs += olengths_idxs[i]; 2861 } 2862 switch (new_local_type_private) { 2863 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2864 new_local_type = MATSEQAIJ; 2865 bs = 1; 2866 break; 2867 case MATAIJ_PRIVATE: 2868 new_local_type = MATSEQAIJ; 2869 bs = 1; 2870 break; 2871 case MATBAIJ_PRIVATE: 2872 new_local_type = MATSEQBAIJ; 2873 break; 2874 case MATSBAIJ_PRIVATE: 2875 new_local_type = MATSEQSBAIJ; 2876 break; 2877 default: 2878 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2879 break; 2880 } 2881 } 2882 2883 /* create MATIS object */ 2884 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2885 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2886 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2887 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2888 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2889 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2890 2891 /* set values */ 2892 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2893 ptr_vals = recv_buffer_vals; 2894 ptr_idxs = recv_buffer_idxs; 2895 for (i=0;i<n_recvs;i++) { 2896 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2897 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2898 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2899 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2900 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2901 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2902 } 2903 ptr_idxs += olengths_idxs[i]; 2904 ptr_vals += olengths_vals[i]; 2905 } 2906 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2907 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2908 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2909 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2910 2911 { /* check */ 2912 Vec lvec,rvec; 2913 PetscReal infty_error; 2914 2915 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2916 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2917 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2918 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2919 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2920 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2921 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2922 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2923 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2924 } 2925 2926 /* free workspace */ 2927 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2928 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2929 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2930 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2931 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2932 if (isdense) { 2933 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2934 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2935 } else { 2936 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2937 } 2938 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2939 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2940 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2941 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2942 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2943 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2944 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2945 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2946 ierr = PetscFree(onodes);CHKERRQ(ierr); 2947 /* get back new mat */ 2948 *mat_n = new_mat; 2949 PetscFunctionReturn(0); 2950 } 2951 2952 #undef __FUNCT__ 2953 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2954 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2955 { 2956 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2957 PC_IS *pcis = (PC_IS*)pc->data; 2958 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2959 MatNullSpace CoarseNullSpace=NULL; 2960 ISLocalToGlobalMapping coarse_islg; 2961 IS coarse_is; 2962 PetscInt max_it,coarse_size,*local_primal_indices=NULL; 2963 PetscInt im_active=-1,active_procs=-1; 2964 PC pc_temp; 2965 PCType coarse_pc_type; 2966 KSPType coarse_ksp_type; 2967 PetscBool multilevel_requested,multilevel_allowed; 2968 PetscBool setsym,issym,isbddc,isnn; 2969 MatStructure matstruct; 2970 PetscErrorCode ierr; 2971 2972 PetscFunctionBegin; 2973 /* Assign global numbering to coarse dofs */ 2974 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 2975 2976 /* infer some info from user */ 2977 issym = PETSC_FALSE; 2978 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 2979 multilevel_allowed = PETSC_FALSE; 2980 multilevel_requested = PETSC_FALSE; 2981 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 2982 if (multilevel_requested) { 2983 /* count "active processes" */ 2984 im_active = !!(pcis->n); 2985 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2986 if (active_procs/pcbddc->coarsening_ratio < 2) { 2987 multilevel_allowed = PETSC_FALSE; 2988 } else { 2989 multilevel_allowed = PETSC_TRUE; 2990 } 2991 } 2992 2993 /* set defaults for coarse KSP and PC */ 2994 if (multilevel_allowed) { 2995 if (issym) { 2996 coarse_ksp_type = KSPRICHARDSON; 2997 } else { 2998 coarse_ksp_type = KSPCHEBYSHEV; 2999 } 3000 coarse_pc_type = PCBDDC; 3001 } else { 3002 coarse_ksp_type = KSPPREONLY; 3003 coarse_pc_type = PCREDUNDANT; 3004 } 3005 3006 /* create the coarse KSP object only once with defaults */ 3007 if (!pcbddc->coarse_ksp) { 3008 char prefix[256],str_level[3]; 3009 size_t len; 3010 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3011 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3012 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3013 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3014 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3015 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3016 /* prefix */ 3017 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3018 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3019 if (!pcbddc->current_level) { 3020 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3021 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3022 } else { 3023 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3024 if (pcbddc->current_level>1) len -= 2; 3025 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3026 *(prefix+len)='\0'; 3027 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3028 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3029 } 3030 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3031 } 3032 /* allow user customization */ 3033 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3034 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3035 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3036 3037 /* get some info after set from options */ 3038 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3039 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3040 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3041 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3042 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3043 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3044 isbddc = PETSC_FALSE; 3045 } 3046 3047 /* propagate BDDC info to the next level */ 3048 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3049 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3050 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3051 3052 /* creates temporary MATIS object for coarse matrix */ 3053 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3054 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3055 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3056 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3057 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3058 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3059 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3060 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3061 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3062 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 3063 3064 /* assemble coarse matrix */ 3065 if (isbddc || isnn) { 3066 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3067 } else { 3068 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3069 } 3070 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3071 3072 /* create local to global scatters for coarse problem */ 3073 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3074 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3075 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3076 3077 /* propagate symmetry info to coarse matrix */ 3078 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3079 3080 /* Compute coarse null space (special handling by BDDC only) */ 3081 if (pcbddc->NullSpace) { 3082 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3083 if (isbddc) { 3084 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3085 } else { 3086 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3087 } 3088 } 3089 3090 /* set operators */ 3091 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3092 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3093 3094 /* additional KSP customization */ 3095 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3096 if (max_it < 5) { 3097 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3098 } 3099 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3100 3101 3102 /* print some info if requested */ 3103 if (pcbddc->dbg_flag) { 3104 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3105 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3106 if (!multilevel_allowed) { 3107 if (multilevel_requested) { 3108 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3109 } else if (pcbddc->max_levels) { 3110 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3111 } 3112 } 3113 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3114 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3115 } 3116 3117 /* setup coarse ksp */ 3118 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3119 if (pcbddc->dbg_flag) { 3120 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3121 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3122 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3123 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3124 } 3125 3126 /* Check coarse problem if requested */ 3127 if (pcbddc->dbg_flag) { 3128 KSP check_ksp; 3129 KSPType check_ksp_type; 3130 PC check_pc; 3131 Vec check_vec; 3132 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3133 PetscInt its; 3134 PetscBool ispreonly,compute; 3135 3136 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3137 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3138 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3139 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3140 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3141 if (ispreonly) { 3142 check_ksp_type = KSPPREONLY; 3143 compute = PETSC_FALSE; 3144 } else { 3145 if (issym) check_ksp_type = KSPCG; 3146 else check_ksp_type = KSPGMRES; 3147 compute = PETSC_TRUE; 3148 } 3149 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3150 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3151 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3152 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3153 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3154 /* create random vec */ 3155 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3156 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3157 if (CoarseNullSpace) { 3158 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3159 } 3160 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3161 /* solve coarse problem */ 3162 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3163 if (CoarseNullSpace) { 3164 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3165 } 3166 /* check coarse problem residual error */ 3167 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3168 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3169 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3170 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3171 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3172 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3173 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3174 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3175 /* get eigenvalue estimation if preonly has not been requested */ 3176 if (!ispreonly) { 3177 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3178 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3179 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3180 } 3181 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3182 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3183 } 3184 /* free memory */ 3185 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3186 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3187 PetscFunctionReturn(0); 3188 } 3189 3190 #undef __FUNCT__ 3191 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3192 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3193 { 3194 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3195 PC_IS* pcis = (PC_IS*)pc->data; 3196 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3197 PetscInt i,j,coarse_size; 3198 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3199 PetscErrorCode ierr; 3200 3201 PetscFunctionBegin; 3202 /* get indices in local ordering for vertices and constraints */ 3203 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3204 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3205 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3206 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3207 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3208 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3209 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3210 3211 /* Compute global number of coarse dofs */ 3212 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3213 3214 /* check numbering */ 3215 if (pcbddc->dbg_flag) { 3216 PetscScalar coarsesum,*array; 3217 3218 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3219 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3220 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3221 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3222 for (i=0;i<pcbddc->local_primal_size;i++) { 3223 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3224 } 3225 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3226 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3227 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3228 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3229 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3230 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3231 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3232 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3233 for (i=0;i<pcis->n;i++) { 3234 if (array[i] == 1.0) { 3235 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3236 } 3237 } 3238 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3239 for (i=0;i<pcis->n;i++) { 3240 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3241 } 3242 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3243 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3244 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3245 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3246 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3247 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3248 if (pcbddc->dbg_flag > 1) { 3249 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3250 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3251 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3252 for (i=0;i<pcbddc->local_primal_size;i++) { 3253 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]); 3254 } 3255 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3256 } 3257 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3258 } 3259 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3260 /* get back data */ 3261 *coarse_size_n = coarse_size; 3262 *local_primal_indices_n = local_primal_indices; 3263 PetscFunctionReturn(0); 3264 } 3265 3266