1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 76 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCResetSolvers" 82 PetscErrorCode PCBDDCResetSolvers(PC pc) 83 { 84 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 89 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 94 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 100 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 101 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 104 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 107 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 108 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 110 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 111 PetscFunctionReturn(0); 112 } 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 116 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 117 { 118 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 119 PC_IS *pcis = (PC_IS*)pc->data; 120 VecType impVecType; 121 PetscInt n_constraints,n_R,old_size; 122 PetscErrorCode ierr; 123 124 PetscFunctionBegin; 125 if (!pcbddc->ConstraintMatrix) { 126 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 127 } 128 /* get sizes */ 129 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 130 n_R = pcis->n-pcbddc->n_actual_vertices; 131 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 132 /* local work vectors (try to avoid unneeded work)*/ 133 /* R nodes */ 134 old_size = -1; 135 if (pcbddc->vec1_R) { 136 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 137 } 138 if (n_R != old_size) { 139 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 140 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 141 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 142 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 143 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 144 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 145 } 146 /* local primal dofs */ 147 old_size = -1; 148 if (pcbddc->vec1_P) { 149 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 150 } 151 if (pcbddc->local_primal_size != old_size) { 152 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 153 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 154 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 155 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 156 } 157 /* local explicit constraints */ 158 old_size = -1; 159 if (pcbddc->vec1_C) { 160 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 161 } 162 if (n_constraints && n_constraints != old_size) { 163 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 164 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 165 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 166 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNCT__ 172 #define __FUNCT__ "PCBDDCSetUpCorrection" 173 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 174 { 175 PetscErrorCode ierr; 176 /* pointers to pcis and pcbddc */ 177 PC_IS* pcis = (PC_IS*)pc->data; 178 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 179 /* submatrices of local problem */ 180 Mat A_RV,A_VR,A_VV; 181 /* working matrices */ 182 Mat M1,M2,M3,C_CR; 183 /* working vectors */ 184 Vec vec1_C,vec2_C,vec1_V,vec2_V; 185 /* additional working stuff */ 186 IS is_aux; 187 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 188 const PetscScalar *array,*row_cmat_values; 189 const PetscInt *row_cmat_indices,*idx_R_local; 190 PetscInt *idx_V_B,*auxindices; 191 PetscInt n_vertices,n_constraints,size_of_constraint; 192 PetscInt i,j,n_R,n_D,n_B; 193 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check; 194 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 195 MatType impMatType; 196 /* some shortcuts to scalars */ 197 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 198 /* for debugging purposes */ 199 PetscReal *coarsefunctions_errors,*constraints_errors; 200 201 PetscFunctionBegin; 202 /* get number of vertices (corners plus constraints with change of basis) 203 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 204 n_vertices = pcbddc->n_actual_vertices; 205 n_constraints = pcbddc->local_primal_size-n_vertices; 206 /* Set Non-overlapping dimensions */ 207 n_B = pcis->n_B; n_D = pcis->n - n_B; 208 n_R = pcis->n-n_vertices; 209 210 /* Set types for local objects needed by BDDC precondtioner */ 211 impMatType = MATSEQDENSE; 212 213 /* Allocating some extra storage just to be safe */ 214 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 215 for (i=0;i<pcis->n;i++) auxindices[i]=i; 216 217 /* vertices in boundary numbering */ 218 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 219 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 220 if (i != n_vertices) { 221 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 222 } 223 224 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 225 if (n_constraints) { 226 /* see if we can save some allocations */ 227 if (pcbddc->local_auxmat2) { 228 PetscInt on_R,on_constraints; 229 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 230 if (on_R != n_R || on_constraints != n_constraints) { 231 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 232 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 233 } 234 } 235 /* work vectors */ 236 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 237 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 238 /* auxiliary matrices */ 239 if (!pcbddc->local_auxmat2) { 240 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 241 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 242 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 243 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 244 } 245 246 /* Extract constraints on R nodes: C_{CR} */ 247 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 248 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 249 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 250 251 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 252 for (i=0;i<n_constraints;i++) { 253 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 254 /* Get row of constraint matrix in R numbering */ 255 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 256 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 257 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 258 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 259 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 260 /* Solve for row of constraint matrix in R numbering */ 261 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 262 /* Set values in local_auxmat2 */ 263 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 264 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 265 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 266 } 267 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 268 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 269 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 270 271 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 272 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 273 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 274 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 275 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 276 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 277 ierr = MatSetUp(M1);CHKERRQ(ierr); 278 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 279 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 280 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 281 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 282 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 283 ierr = MatDestroy(&M2);CHKERRQ(ierr); 284 ierr = MatDestroy(&M3);CHKERRQ(ierr); 285 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 286 if (!pcbddc->local_auxmat1) { 287 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 288 } else { 289 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 290 } 291 } 292 293 /* Get submatrices from subdomain matrix */ 294 if (n_vertices) { 295 PetscInt ibs,mbs; 296 PetscBool issbaij; 297 Mat newmat; 298 299 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 300 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 301 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 302 if (ibs != mbs) { /* need to convert to SEQAIJ */ 303 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 304 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 305 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 307 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 308 } else { 309 /* this is safe */ 310 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 311 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 312 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 313 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 314 /* which of the two approaches is faster? */ 315 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 316 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 317 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 318 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 319 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 320 } else { 321 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 322 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 323 } 324 } 325 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 326 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 327 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 328 } 329 330 /* Matrix of coarse basis functions (local) */ 331 if (pcbddc->coarse_phi_B) { 332 PetscInt on_B,on_primal; 333 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 334 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 335 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 336 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 337 } 338 } 339 if (pcbddc->coarse_phi_D) { 340 PetscInt on_D,on_primal; 341 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 342 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 343 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 344 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 345 } 346 } 347 if (!pcbddc->coarse_phi_B) { 348 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 349 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 350 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 351 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 352 } 353 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 354 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 355 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 356 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 357 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 358 } 359 360 if (pcbddc->dbg_flag) { 361 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 362 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 363 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 364 } 365 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 366 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 367 368 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 369 370 /* vertices */ 371 for (i=0;i<n_vertices;i++) { 372 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 373 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 374 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 375 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 376 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 377 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 378 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 379 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 380 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 381 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 382 if (n_constraints) { 383 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 384 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 385 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 386 } 387 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 388 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 389 390 /* Set values in coarse basis function and subdomain part of coarse_mat */ 391 /* coarse basis functions */ 392 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 393 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 394 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 395 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 396 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 397 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 398 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 399 if (pcbddc->switch_static || pcbddc->dbg_flag) { 400 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 401 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 402 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 403 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 404 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 405 } 406 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 407 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 408 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 409 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 410 if (n_constraints) { 411 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 412 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 413 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 414 } 415 416 /* check */ 417 if (pcbddc->dbg_flag) { 418 /* assemble subdomain vector on local nodes */ 419 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 420 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 421 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 422 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 423 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 425 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 426 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 427 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 428 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 429 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 430 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 431 if (n_constraints) { 432 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 433 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 434 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 435 } 436 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 437 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 438 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 439 /* check saddle point solution */ 440 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 441 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 442 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 443 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 444 /* shift by the identity matrix */ 445 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 446 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 447 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 449 } 450 } 451 452 /* constraints */ 453 for (i=0;i<n_constraints;i++) { 454 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 455 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 456 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 457 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 458 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 459 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 460 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 461 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 462 if (n_vertices) { 463 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 464 } 465 /* Set values in coarse basis function and subdomain part of coarse_mat */ 466 /* coarse basis functions */ 467 j = i+n_vertices; /* don't touch this! */ 468 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 469 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 470 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 472 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 473 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 474 if (pcbddc->switch_static || pcbddc->dbg_flag) { 475 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 476 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 478 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 479 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 480 } 481 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 482 if (n_vertices) { 483 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 484 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 485 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 486 } 487 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 488 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 489 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 490 491 if (pcbddc->dbg_flag) { 492 /* assemble subdomain vector on nodes */ 493 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 494 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 495 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 496 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 497 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 498 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 499 /* assemble subdomain vector of lagrange multipliers */ 500 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 501 if (n_vertices) { 502 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 503 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 504 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 505 } 506 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 507 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 508 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 509 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 510 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 511 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 512 /* check saddle point solution */ 513 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 514 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 515 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 516 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 517 /* shift by the identity matrix */ 518 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 519 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 520 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 521 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 522 } 523 } 524 /* call assembling routines for local coarse basis */ 525 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 526 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 527 if (pcbddc->switch_static || pcbddc->dbg_flag) { 528 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 529 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 530 } 531 532 /* compute other basis functions for non-symmetric problems */ 533 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 534 if (!setsym || (setsym && !issym)) { 535 if (!pcbddc->coarse_psi_B) { 536 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 537 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 538 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 539 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 540 } 541 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 542 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 543 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 544 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 545 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 546 } 547 for (i=0;i<pcbddc->local_primal_size;i++) { 548 if (n_constraints) { 549 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 550 for (j=0;j<n_constraints;j++) { 551 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 552 } 553 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 554 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 555 } 556 if (i<n_vertices) { 557 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 558 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 559 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 560 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 561 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 562 if (n_constraints) { 563 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 564 } 565 } else { 566 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 567 } 568 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 569 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 570 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 571 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 572 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 573 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 574 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 575 if (i<n_vertices) { 576 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 577 } 578 if (pcbddc->switch_static || pcbddc->dbg_flag) { 579 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 580 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 581 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 582 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 583 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 584 } 585 586 if (pcbddc->dbg_flag) { 587 /* assemble subdomain vector on nodes */ 588 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 589 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 590 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 591 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 592 if (i<n_vertices) { 593 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 594 } 595 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 596 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 597 /* assemble subdomain vector of lagrange multipliers */ 598 for (j=0;j<pcbddc->local_primal_size;j++) { 599 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 600 } 601 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 602 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 603 /* check saddle point solution */ 604 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 605 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 606 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 607 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 608 /* shift by the identity matrix */ 609 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 610 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 611 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 612 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 613 } 614 } 615 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 616 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 617 if (pcbddc->switch_static || pcbddc->dbg_flag) { 618 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 619 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 620 } 621 unsymmetric_check = PETSC_TRUE; 622 } else { /* take references to already computed coarse basis */ 623 unsymmetric_check = PETSC_FALSE; 624 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 625 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 626 if (pcbddc->coarse_phi_D) { 627 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 628 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 629 } 630 } 631 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 632 /* Checking coarse_sub_mat and coarse basis functios */ 633 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 634 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 635 if (pcbddc->dbg_flag) { 636 Mat coarse_sub_mat; 637 Mat AUXMAT,TM1,TM2,TM3,TM4; 638 Mat coarse_phi_D,coarse_phi_B; 639 Mat coarse_psi_D,coarse_psi_B; 640 Mat A_II,A_BB,A_IB,A_BI; 641 MatType checkmattype=MATSEQAIJ; 642 PetscReal real_value; 643 644 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 645 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 646 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 647 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 648 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 649 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 650 if (unsymmetric_check) { 651 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 652 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 653 } 654 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 655 656 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 657 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 658 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 659 if (unsymmetric_check) { 660 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 661 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 662 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 663 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 664 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 665 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 666 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 667 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 668 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 669 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 670 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 671 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 672 } else { 673 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 674 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 675 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 676 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 677 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 678 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 679 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 680 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 681 } 682 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 683 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 684 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 685 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 686 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 687 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 688 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 689 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 693 for (i=0;i<pcbddc->local_primal_size;i++) { 694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 695 } 696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 697 for (i=0;i<pcbddc->local_primal_size;i++) { 698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 699 } 700 if (unsymmetric_check) { 701 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 702 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 704 } 705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 706 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 708 } 709 } 710 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 711 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 712 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 713 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 714 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 715 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 716 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 717 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 718 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 719 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 720 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 721 if (unsymmetric_check) { 722 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 723 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 724 } 725 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 726 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 727 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 728 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 729 } 730 /* free memory */ 731 if (n_vertices) { 732 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 733 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 734 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 735 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 736 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 737 } 738 if (n_constraints) { 739 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 740 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 741 ierr = MatDestroy(&M1);CHKERRQ(ierr); 742 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 743 } 744 ierr = PetscFree(auxindices);CHKERRQ(ierr); 745 /* get back data */ 746 *coarse_submat_vals_n = coarse_submat_vals; 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNCT__ 751 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 752 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 753 { 754 PC_IS* pcis = (PC_IS*)(pc->data); 755 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 756 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 757 PetscBool issbaij,isseqaij; 758 /* manage repeated solves */ 759 MatReuse reuse; 760 MatStructure matstruct; 761 PetscErrorCode ierr; 762 763 PetscFunctionBegin; 764 if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) { 765 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 766 } 767 /* get mat flags */ 768 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 769 reuse = MAT_INITIAL_MATRIX; 770 if (pc->setupcalled) { 771 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 772 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 773 if (matstruct == SAME_NONZERO_PATTERN) { 774 reuse = MAT_REUSE_MATRIX; 775 } else { 776 reuse = MAT_INITIAL_MATRIX; 777 } 778 } 779 if (reuse == MAT_INITIAL_MATRIX) { 780 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 781 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 782 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 783 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 784 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 785 } 786 787 /* transform local matrices if needed */ 788 if (pcbddc->use_change_of_basis) { 789 Mat change_mat_all; 790 PetscScalar *row_cmat_values; 791 PetscInt *row_cmat_indices; 792 PetscInt *nnz,*is_indices,*temp_indices; 793 PetscInt i,j,k,n_D,n_B; 794 795 /* Get Non-overlapping dimensions */ 796 n_B = pcis->n_B; 797 n_D = pcis->n-n_B; 798 799 /* compute nonzero structure of change of basis on all local nodes */ 800 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 801 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 802 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 803 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 804 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 805 k=1; 806 for (i=0;i<n_B;i++) { 807 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 808 nnz[is_indices[i]]=j; 809 if (k < j) k = j; 810 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 811 } 812 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 813 /* assemble change of basis matrix on the whole set of local dofs */ 814 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 815 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 816 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 817 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 818 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 819 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 820 for (i=0;i<n_D;i++) { 821 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 822 } 823 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 824 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 825 for (i=0;i<n_B;i++) { 826 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 827 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 828 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 829 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 830 } 831 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 832 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 833 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 834 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr); 835 if (isseqaij) { 836 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 837 } else { 838 Mat work_mat; 839 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 840 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 841 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 842 } 843 /* 844 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 845 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 846 */ 847 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 848 ierr = PetscFree(nnz);CHKERRQ(ierr); 849 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 850 } else { 851 /* without change of basis, the local matrix is unchanged */ 852 if (!pcbddc->local_mat) { 853 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 854 pcbddc->local_mat = matis->A; 855 } 856 } 857 858 /* get submatrices */ 859 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 860 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 861 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 862 if (!issbaij) { 863 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 864 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 865 } else { 866 Mat newmat; 867 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 868 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 869 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 870 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 871 } 872 PetscFunctionReturn(0); 873 } 874 875 #undef __FUNCT__ 876 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 877 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 878 { 879 PC_IS* pcis = (PC_IS*)(pc->data); 880 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 881 IS is_aux1,is_aux2; 882 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 883 PetscInt n_vertices,i,j,n_R,n_D,n_B; 884 PetscInt vbs,bs; 885 PetscBT bitmask; 886 PetscErrorCode ierr; 887 888 PetscFunctionBegin; 889 /* 890 No need to setup local scatters if 891 - primal space is unchanged 892 AND 893 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 894 AND 895 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 896 */ 897 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 898 PetscFunctionReturn(0); 899 } 900 /* destroy old objects */ 901 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 902 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 903 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 904 /* Set Non-overlapping dimensions */ 905 n_B = pcis->n_B; n_D = pcis->n - n_B; 906 n_vertices = pcbddc->n_actual_vertices; 907 /* create auxiliary bitmask */ 908 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 909 for (i=0;i<n_vertices;i++) { 910 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 911 } 912 913 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 914 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 915 for (i=0, n_R=0; i<pcis->n; i++) { 916 if (!PetscBTLookup(bitmask,i)) { 917 idx_R_local[n_R] = i; 918 n_R++; 919 } 920 } 921 922 /* Block code */ 923 vbs = 1; 924 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 925 if (bs>1 && !(n_vertices%bs)) { 926 PetscBool is_blocked = PETSC_TRUE; 927 PetscInt *vary; 928 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 929 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 930 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 931 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 932 for (i=0; i<n_vertices; i++) { 933 if (vary[i]!=0 && vary[i]!=bs) { 934 is_blocked = PETSC_FALSE; 935 break; 936 } 937 } 938 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 939 vbs = bs; 940 for (i=0;i<n_R/vbs;i++) { 941 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 942 } 943 } 944 ierr = PetscFree(vary);CHKERRQ(ierr); 945 } 946 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 947 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 948 949 /* print some info if requested */ 950 if (pcbddc->dbg_flag) { 951 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 952 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 953 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 954 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 956 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 957 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 958 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 959 } 960 961 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 962 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 963 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 964 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 965 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 966 for (i=0; i<n_D; i++) { 967 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 968 } 969 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 970 for (i=0, j=0; i<n_R; i++) { 971 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 972 aux_array1[j++] = i; 973 } 974 } 975 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 976 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 977 for (i=0, j=0; i<n_B; i++) { 978 if (!PetscBTLookup(bitmask,is_indices[i])) { 979 aux_array2[j++] = i; 980 } 981 } 982 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 983 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 984 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 985 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 986 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 987 988 if (pcbddc->switch_static || pcbddc->dbg_flag) { 989 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 990 for (i=0, j=0; i<n_R; i++) { 991 if (PetscBTLookup(bitmask,idx_R_local[i])) { 992 aux_array1[j++] = i; 993 } 994 } 995 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 996 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 997 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 998 } 999 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1000 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1007 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1008 { 1009 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1010 PC_IS *pcis = (PC_IS*)pc->data; 1011 PC pc_temp; 1012 Mat A_RR; 1013 MatStructure matstruct; 1014 MatReuse reuse; 1015 PetscScalar m_one = -1.0; 1016 PetscReal value; 1017 PetscInt n_D,n_R,ibs,mbs; 1018 PetscBool use_exact,use_exact_reduced,issbaij; 1019 PetscErrorCode ierr; 1020 /* prefixes stuff */ 1021 char dir_prefix[256],neu_prefix[256],str_level[16]; 1022 size_t len; 1023 1024 PetscFunctionBegin; 1025 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 1026 1027 /* compute prefixes */ 1028 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1029 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1030 if (!pcbddc->current_level) { 1031 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1032 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1033 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1034 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1035 } else { 1036 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1037 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1038 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1039 len -= 15; /* remove "pc_bddc_coarse_" */ 1040 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1041 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1042 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1043 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1044 *(dir_prefix+len)='\0'; 1045 *(neu_prefix+len)='\0'; 1046 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1047 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1048 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1049 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1050 } 1051 1052 /* DIRICHLET PROBLEM */ 1053 /* Matrix for Dirichlet problem is pcis->A_II */ 1054 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1055 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1056 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1057 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1058 /* default */ 1059 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1060 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1061 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1062 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1063 if (issbaij) { 1064 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1065 } else { 1066 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1067 } 1068 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1069 /* Allow user's customization */ 1070 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1071 } 1072 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 1073 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1074 if (!n_D) { 1075 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1076 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1077 } 1078 /* Set Up KSP for Dirichlet problem of BDDC */ 1079 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1080 /* set ksp_D into pcis data */ 1081 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1082 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1083 pcis->ksp_D = pcbddc->ksp_D; 1084 1085 /* NEUMANN PROBLEM */ 1086 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1087 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1088 if (pcbddc->ksp_R) { /* already created ksp */ 1089 PetscInt nn_R; 1090 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr); 1091 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1092 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1093 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1094 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1095 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1096 reuse = MAT_INITIAL_MATRIX; 1097 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1098 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1099 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1100 reuse = MAT_INITIAL_MATRIX; 1101 } else { /* safe to reuse the matrix */ 1102 reuse = MAT_REUSE_MATRIX; 1103 } 1104 } 1105 /* last check */ 1106 if (matstruct == DIFFERENT_NONZERO_PATTERN) { 1107 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1108 reuse = MAT_INITIAL_MATRIX; 1109 } 1110 } else { /* first time, so we need to create the matrix */ 1111 reuse = MAT_INITIAL_MATRIX; 1112 } 1113 /* extract A_RR */ 1114 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1115 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1116 if (ibs != mbs) { 1117 Mat newmat; 1118 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1119 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1120 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1121 } else { 1122 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1123 } 1124 if (!pcbddc->ksp_R) { /* create object if not present */ 1125 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1126 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1127 /* default */ 1128 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1129 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1130 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1131 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1132 if (issbaij) { 1133 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1134 } else { 1135 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1136 } 1137 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1138 /* Allow user's customization */ 1139 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1140 } 1141 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1142 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1143 if (!n_R) { 1144 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1145 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1146 } 1147 /* Set Up KSP for Neumann problem of BDDC */ 1148 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1149 1150 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1151 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1152 /* Dirichlet */ 1153 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1154 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1155 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1156 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1157 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1158 /* need to be adapted? */ 1159 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1160 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1161 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1162 /* print info */ 1163 if (pcbddc->dbg_flag) { 1164 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1165 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1166 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1167 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1168 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1169 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1170 } 1171 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1172 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1173 } 1174 1175 /* Neumann */ 1176 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1177 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1178 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1179 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1180 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1181 /* need to be adapted? */ 1182 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1183 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1184 /* print info */ 1185 if (pcbddc->dbg_flag) { 1186 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1187 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1188 } 1189 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1190 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1191 } 1192 } 1193 /* free Neumann problem's matrix */ 1194 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1200 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1201 { 1202 PetscErrorCode ierr; 1203 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1204 1205 PetscFunctionBegin; 1206 if (applytranspose) { 1207 if (pcbddc->local_auxmat1) { 1208 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1209 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1210 } 1211 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1212 } else { 1213 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1214 if (pcbddc->local_auxmat1) { 1215 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1216 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1217 } 1218 } 1219 PetscFunctionReturn(0); 1220 } 1221 1222 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1223 #undef __FUNCT__ 1224 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1225 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1226 { 1227 PetscErrorCode ierr; 1228 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1229 PC_IS* pcis = (PC_IS*) (pc->data); 1230 const PetscScalar zero = 0.0; 1231 1232 PetscFunctionBegin; 1233 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1234 if (applytranspose) { 1235 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1236 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1237 } else { 1238 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1239 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1240 } 1241 /* Scatter data of coarse_rhs */ 1242 ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 1243 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 1245 /* Local solution on R nodes */ 1246 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1247 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1248 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1249 if (pcbddc->switch_static) { 1250 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1251 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1252 } 1253 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1254 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1255 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1256 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1257 if (pcbddc->switch_static) { 1258 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1259 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1260 } 1261 1262 /* Coarse solution */ 1263 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 /* TODO remove null space when doing multilevel */ 1265 if (applytranspose) { 1266 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1267 } else { 1268 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1269 } 1270 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1271 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1272 1273 /* Sum contributions from two levels */ 1274 if (applytranspose) { 1275 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1276 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1277 } else { 1278 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1279 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1280 } 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1286 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1287 { 1288 PetscErrorCode ierr; 1289 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1290 1291 PetscFunctionBegin; 1292 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1298 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1299 { 1300 PetscErrorCode ierr; 1301 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1302 1303 PetscFunctionBegin; 1304 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 /* uncomment for testing purposes */ 1309 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1312 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1313 { 1314 PetscErrorCode ierr; 1315 PC_IS* pcis = (PC_IS*)(pc->data); 1316 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1317 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1318 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1319 MatType impMatType=MATSEQAIJ; 1320 /* one and zero */ 1321 PetscScalar one=1.0,zero=0.0; 1322 /* space to store constraints and their local indices */ 1323 PetscScalar *temp_quadrature_constraint; 1324 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1325 /* iterators */ 1326 PetscInt i,j,k,total_counts,temp_start_ptr; 1327 /* stuff to store connected components stored in pcbddc->mat_graph */ 1328 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1329 PetscInt n_ISForFaces,n_ISForEdges; 1330 /* near null space stuff */ 1331 MatNullSpace nearnullsp; 1332 const Vec *nearnullvecs; 1333 Vec *localnearnullsp; 1334 PetscBool nnsp_has_cnst; 1335 PetscInt nnsp_size; 1336 PetscScalar *array; 1337 /* BLAS integers */ 1338 PetscBLASInt lwork,lierr; 1339 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1340 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1341 /* LAPACK working arrays for SVD or POD */ 1342 PetscBool skip_lapack; 1343 PetscScalar *work; 1344 PetscReal *singular_vals; 1345 #if defined(PETSC_USE_COMPLEX) 1346 PetscReal *rwork; 1347 #endif 1348 #if defined(PETSC_MISSING_LAPACK_GESVD) 1349 PetscBLASInt Blas_one_2=1; 1350 PetscScalar *temp_basis,*correlation_mat; 1351 #else 1352 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1353 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1354 #endif 1355 /* reuse */ 1356 PetscInt olocal_primal_size; 1357 PetscInt *oprimal_indices_local_idxs; 1358 /* change of basis */ 1359 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1360 PetscBool boolforchange,qr_needed; 1361 PetscBT touched,change_basis,qr_needed_idx; 1362 /* auxiliary stuff */ 1363 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1364 /* some quantities */ 1365 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1366 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1367 1368 1369 PetscFunctionBegin; 1370 /* Destroy Mat objects computed previously */ 1371 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1372 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1373 /* Get index sets for faces, edges and vertices from graph */ 1374 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1375 pcbddc->use_vertices = PETSC_TRUE; 1376 } 1377 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1378 /* HACK: provide functions to set change of basis */ 1379 if (!ISForVertices && pcbddc->NullSpace) { 1380 pcbddc->use_change_of_basis = PETSC_TRUE; 1381 pcbddc->use_change_on_faces = PETSC_FALSE; 1382 } 1383 /* print some info */ 1384 if (pcbddc->dbg_flag) { 1385 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1386 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1387 i = 0; 1388 if (ISForVertices) { 1389 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1390 } 1391 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1392 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1393 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1394 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1395 } 1396 /* check if near null space is attached to global mat */ 1397 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1398 if (nearnullsp) { 1399 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1400 /* remove any stored info */ 1401 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1402 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1403 /* store information for BDDC solver reuse */ 1404 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1405 pcbddc->onearnullspace = nearnullsp; 1406 ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1407 for (i=0;i<nnsp_size;i++) { 1408 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1409 } 1410 } else { /* if near null space is not provided BDDC uses constants by default */ 1411 nnsp_size = 0; 1412 nnsp_has_cnst = PETSC_TRUE; 1413 } 1414 /* get max number of constraints on a single cc */ 1415 max_constraints = nnsp_size; 1416 if (nnsp_has_cnst) max_constraints++; 1417 1418 /* 1419 Evaluate maximum storage size needed by the procedure 1420 - temp_indices will contain start index of each constraint stored as follows 1421 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1422 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1423 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1424 */ 1425 total_counts = n_ISForFaces+n_ISForEdges; 1426 total_counts *= max_constraints; 1427 n_vertices = 0; 1428 if (ISForVertices) { 1429 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1430 } 1431 total_counts += n_vertices; 1432 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1433 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1434 total_counts = 0; 1435 max_size_of_constraint = 0; 1436 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1437 if (i<n_ISForEdges) { 1438 used_IS = &ISForEdges[i]; 1439 } else { 1440 used_IS = &ISForFaces[i-n_ISForEdges]; 1441 } 1442 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1443 total_counts += j; 1444 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1445 } 1446 total_counts *= max_constraints; 1447 total_counts += n_vertices; 1448 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1449 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1450 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1451 /* get local part of global near null space vectors */ 1452 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1453 for (k=0;k<nnsp_size;k++) { 1454 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1455 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1456 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1457 } 1458 1459 /* whether or not to skip lapack calls */ 1460 skip_lapack = PETSC_TRUE; 1461 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1462 1463 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1464 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1465 PetscScalar temp_work; 1466 #if defined(PETSC_MISSING_LAPACK_GESVD) 1467 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1468 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1469 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1470 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1471 #if defined(PETSC_USE_COMPLEX) 1472 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1473 #endif 1474 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1475 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1476 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1477 lwork = -1; 1478 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1479 #if !defined(PETSC_USE_COMPLEX) 1480 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1481 #else 1482 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1483 #endif 1484 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1485 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1486 #else /* on missing GESVD */ 1487 /* SVD */ 1488 PetscInt max_n,min_n; 1489 max_n = max_size_of_constraint; 1490 min_n = max_constraints; 1491 if (max_size_of_constraint < max_constraints) { 1492 min_n = max_size_of_constraint; 1493 max_n = max_constraints; 1494 } 1495 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1496 #if defined(PETSC_USE_COMPLEX) 1497 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1498 #endif 1499 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1500 lwork = -1; 1501 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1502 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1503 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1504 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1505 #if !defined(PETSC_USE_COMPLEX) 1506 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1507 #else 1508 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1509 #endif 1510 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1511 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1512 #endif /* on missing GESVD */ 1513 /* Allocate optimal workspace */ 1514 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1515 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1516 } 1517 /* Now we can loop on constraining sets */ 1518 total_counts = 0; 1519 temp_indices[0] = 0; 1520 /* vertices */ 1521 if (ISForVertices) { 1522 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1523 if (nnsp_has_cnst) { /* consider all vertices */ 1524 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1525 for (i=0;i<n_vertices;i++) { 1526 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1527 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1528 total_counts++; 1529 } 1530 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1531 PetscBool used_vertex; 1532 for (i=0;i<n_vertices;i++) { 1533 used_vertex = PETSC_FALSE; 1534 k = 0; 1535 while (!used_vertex && k<nnsp_size) { 1536 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1537 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1538 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1539 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1540 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1541 total_counts++; 1542 used_vertex = PETSC_TRUE; 1543 } 1544 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1545 k++; 1546 } 1547 } 1548 } 1549 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1550 n_vertices = total_counts; 1551 } 1552 1553 /* edges and faces */ 1554 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1555 if (i<n_ISForEdges) { 1556 used_IS = &ISForEdges[i]; 1557 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1558 } else { 1559 used_IS = &ISForFaces[i-n_ISForEdges]; 1560 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1561 } 1562 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1563 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1564 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1565 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1566 /* change of basis should not be performed on local periodic nodes */ 1567 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1568 if (nnsp_has_cnst) { 1569 PetscScalar quad_value; 1570 temp_constraints++; 1571 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1572 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1573 for (j=0;j<size_of_constraint;j++) { 1574 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1575 } 1576 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1577 total_counts++; 1578 } 1579 for (k=0;k<nnsp_size;k++) { 1580 PetscReal real_value; 1581 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1582 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1583 for (j=0;j<size_of_constraint;j++) { 1584 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1585 } 1586 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1587 /* check if array is null on the connected component */ 1588 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1589 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1590 if (real_value > 0.0) { /* keep indices and values */ 1591 temp_constraints++; 1592 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1593 total_counts++; 1594 } 1595 } 1596 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1597 valid_constraints = temp_constraints; 1598 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1599 if (!pcbddc->use_nnsp_true && temp_constraints) { 1600 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1601 1602 #if defined(PETSC_MISSING_LAPACK_GESVD) 1603 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1604 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1605 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1606 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1607 from that computed using LAPACKgesvd 1608 -> This is due to a different computation of eigenvectors in LAPACKheev 1609 -> The quality of the POD-computed basis will be the same */ 1610 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1611 /* Store upper triangular part of correlation matrix */ 1612 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1613 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1614 for (j=0;j<temp_constraints;j++) { 1615 for (k=0;k<j+1;k++) { 1616 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1617 } 1618 } 1619 /* compute eigenvalues and eigenvectors of correlation matrix */ 1620 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1621 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1622 #if !defined(PETSC_USE_COMPLEX) 1623 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1624 #else 1625 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1626 #endif 1627 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1628 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1629 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1630 j=0; 1631 while (j < temp_constraints && singular_vals[j] < tol) j++; 1632 total_counts=total_counts-j; 1633 valid_constraints = temp_constraints-j; 1634 /* scale and copy POD basis into used quadrature memory */ 1635 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1636 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1637 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1638 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1639 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1640 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1641 if (j<temp_constraints) { 1642 PetscInt ii; 1643 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1644 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1645 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1646 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1647 for (k=0;k<temp_constraints-j;k++) { 1648 for (ii=0;ii<size_of_constraint;ii++) { 1649 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1650 } 1651 } 1652 } 1653 #else /* on missing GESVD */ 1654 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1655 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1656 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1657 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1658 #if !defined(PETSC_USE_COMPLEX) 1659 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1660 #else 1661 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1662 #endif 1663 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1664 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1665 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1666 k = temp_constraints; 1667 if (k > size_of_constraint) k = size_of_constraint; 1668 j = 0; 1669 while (j < k && singular_vals[k-j-1] < tol) j++; 1670 total_counts = total_counts-temp_constraints+k-j; 1671 valid_constraints = k-j; 1672 #endif /* on missing GESVD */ 1673 } 1674 /* setting change_of_basis flag is safe now */ 1675 if (boolforchange) { 1676 for (j=0;j<valid_constraints;j++) { 1677 PetscBTSet(change_basis,total_counts-j-1); 1678 } 1679 } 1680 } 1681 /* free index sets of faces, edges and vertices */ 1682 for (i=0;i<n_ISForFaces;i++) { 1683 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1684 } 1685 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1686 for (i=0;i<n_ISForEdges;i++) { 1687 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1688 } 1689 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1690 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1691 /* map temp_indices_to_constraint in boundary numbering */ 1692 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1693 if (i != temp_indices[total_counts]) { 1694 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1695 } 1696 1697 /* free workspace */ 1698 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1699 ierr = PetscFree(work);CHKERRQ(ierr); 1700 #if defined(PETSC_USE_COMPLEX) 1701 ierr = PetscFree(rwork);CHKERRQ(ierr); 1702 #endif 1703 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1704 #if defined(PETSC_MISSING_LAPACK_GESVD) 1705 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1706 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1707 #endif 1708 } 1709 for (k=0;k<nnsp_size;k++) { 1710 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1711 } 1712 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1713 1714 /* set quantities in pcbddc data structure and store previous primal size */ 1715 /* n_vertices defines the number of subdomain corners in the primal space */ 1716 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1717 olocal_primal_size = pcbddc->local_primal_size; 1718 pcbddc->local_primal_size = total_counts; 1719 pcbddc->n_vertices = n_vertices; 1720 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1721 1722 /* Create constraint matrix */ 1723 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1724 /* so we need to set it up properly either with or without change of basis */ 1725 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1726 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1727 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1728 /* array to compute a local numbering of constraints : vertices first then constraints */ 1729 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1730 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1731 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1732 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1733 /* auxiliary stuff for basis change */ 1734 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1735 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1736 1737 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1738 total_primal_vertices=0; 1739 for (i=0;i<pcbddc->local_primal_size;i++) { 1740 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1741 if (size_of_constraint == 1) { 1742 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1743 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1744 aux_primal_minloc[total_primal_vertices]=0; 1745 total_primal_vertices++; 1746 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1747 PetscInt min_loc,min_index; 1748 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1749 /* find first untouched local node */ 1750 k = 0; 1751 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1752 min_index = global_indices[k]; 1753 min_loc = k; 1754 /* search the minimum among global nodes already untouched on the cc */ 1755 for (k=1;k<size_of_constraint;k++) { 1756 /* there can be more than one constraint on a single connected component */ 1757 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1758 min_index = global_indices[k]; 1759 min_loc = k; 1760 } 1761 } 1762 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1763 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1764 aux_primal_minloc[total_primal_vertices]=min_loc; 1765 total_primal_vertices++; 1766 } 1767 } 1768 /* determine if a QR strategy is needed for change of basis */ 1769 qr_needed = PETSC_FALSE; 1770 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1771 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1772 if (PetscBTLookup(change_basis,i)) { 1773 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1774 j = 0; 1775 for (k=0;k<size_of_constraint;k++) { 1776 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1777 j++; 1778 } 1779 } 1780 /* found more than one primal dof on the cc */ 1781 if (j > 1) { 1782 PetscBTSet(qr_needed_idx,i); 1783 qr_needed = PETSC_TRUE; 1784 } 1785 } 1786 } 1787 /* free workspace */ 1788 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1789 1790 /* permute indices in order to have a sorted set of vertices */ 1791 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1792 1793 /* nonzero structure of constraint matrix */ 1794 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1795 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1796 j=total_primal_vertices; 1797 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1798 if (!PetscBTLookup(change_basis,i)) { 1799 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1800 j++; 1801 } 1802 } 1803 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1804 ierr = PetscFree(nnz);CHKERRQ(ierr); 1805 /* set values in constraint matrix */ 1806 for (i=0;i<total_primal_vertices;i++) { 1807 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1808 } 1809 total_counts = total_primal_vertices; 1810 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1811 if (!PetscBTLookup(change_basis,i)) { 1812 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1813 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1814 total_counts++; 1815 } 1816 } 1817 /* assembling */ 1818 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1819 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1820 /* 1821 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1822 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1823 */ 1824 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1825 if (pcbddc->use_change_of_basis) { 1826 /* dual and primal dofs on a single cc */ 1827 PetscInt dual_dofs,primal_dofs; 1828 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1829 PetscInt primal_counter; 1830 /* working stuff for GEQRF */ 1831 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1832 PetscBLASInt lqr_work; 1833 /* working stuff for UNGQR */ 1834 PetscScalar *gqr_work,lgqr_work_t; 1835 PetscBLASInt lgqr_work; 1836 /* working stuff for TRTRS */ 1837 PetscScalar *trs_rhs; 1838 PetscBLASInt Blas_NRHS; 1839 /* pointers for values insertion into change of basis matrix */ 1840 PetscInt *start_rows,*start_cols; 1841 PetscScalar *start_vals; 1842 /* working stuff for values insertion */ 1843 PetscBT is_primal; 1844 1845 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1846 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1847 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1848 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1849 /* work arrays */ 1850 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1851 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1852 /* nonzeros per row */ 1853 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1854 if (PetscBTLookup(change_basis,i)) { 1855 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1856 if (PetscBTLookup(qr_needed_idx,i)) { 1857 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1858 } else { 1859 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1860 /* get local primal index on the cc */ 1861 j = 0; 1862 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1863 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1864 } 1865 } 1866 } 1867 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1868 ierr = PetscFree(nnz);CHKERRQ(ierr); 1869 /* Set initial identity in the matrix */ 1870 for (i=0;i<pcis->n_B;i++) { 1871 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1872 } 1873 1874 if (pcbddc->dbg_flag) { 1875 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1876 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1877 } 1878 1879 1880 /* Now we loop on the constraints which need a change of basis */ 1881 /* 1882 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1883 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1884 1885 Basic blocks of change of basis matrix T computed 1886 1887 - Using the following block transformation if there is only a primal dof on the cc 1888 (in the example, primal dof is the last one of the edge in LOCAL ordering 1889 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1890 | 1 0 ... 0 1 | 1891 | 0 1 ... 0 1 | 1892 | ... | 1893 | 0 ... 1 1 | 1894 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1895 1896 - via QR decomposition of constraints otherwise 1897 */ 1898 if (qr_needed) { 1899 /* space to store Q */ 1900 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1901 /* first we issue queries for optimal work */ 1902 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1903 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1904 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1905 lqr_work = -1; 1906 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1907 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1908 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1909 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1910 lgqr_work = -1; 1911 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1912 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1913 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1914 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1915 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1916 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1917 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1918 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1919 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1920 /* array to store scaling factors for reflectors */ 1921 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1922 /* array to store rhs and solution of triangular solver */ 1923 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1924 /* allocating workspace for check */ 1925 if (pcbddc->dbg_flag) { 1926 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1927 } 1928 } 1929 /* array to store whether a node is primal or not */ 1930 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1931 ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr); 1932 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 1933 if (i != total_primal_vertices) { 1934 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 1935 } 1936 for (i=0;i<total_primal_vertices;i++) { 1937 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 1938 } 1939 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 1940 1941 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1942 /* -> using implicit ordering contained in temp_indices data */ 1943 total_counts = pcbddc->n_vertices; 1944 primal_counter = total_counts; 1945 while (total_counts<pcbddc->local_primal_size) { 1946 primal_dofs = 1; 1947 if (PetscBTLookup(change_basis,total_counts)) { 1948 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1949 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1950 primal_dofs++; 1951 } 1952 /* get constraint info */ 1953 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1954 dual_dofs = size_of_constraint-primal_dofs; 1955 1956 if (pcbddc->dbg_flag) { 1957 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1958 } 1959 1960 if (primal_dofs > 1) { /* QR */ 1961 1962 /* copy quadrature constraints for change of basis check */ 1963 if (pcbddc->dbg_flag) { 1964 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1965 } 1966 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1967 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1968 1969 /* compute QR decomposition of constraints */ 1970 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1971 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1972 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1973 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1974 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1975 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1976 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1977 1978 /* explictly compute R^-T */ 1979 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1980 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1981 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1982 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1983 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1984 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1985 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1986 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1987 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1988 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1989 1990 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1991 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1992 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1993 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1994 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1995 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1996 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1997 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1998 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1999 2000 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2001 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2002 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2003 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2004 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2005 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2006 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2007 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2008 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2009 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2010 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2011 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2012 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2013 2014 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2015 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2016 /* insert cols for primal dofs */ 2017 for (j=0;j<primal_dofs;j++) { 2018 start_vals = &qr_basis[j*size_of_constraint]; 2019 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2020 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2021 } 2022 /* insert cols for dual dofs */ 2023 for (j=0,k=0;j<dual_dofs;k++) { 2024 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2025 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2026 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2027 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2028 j++; 2029 } 2030 } 2031 2032 /* check change of basis */ 2033 if (pcbddc->dbg_flag) { 2034 PetscInt ii,jj; 2035 PetscBool valid_qr=PETSC_TRUE; 2036 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2037 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2038 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2039 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2040 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2041 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2042 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2043 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2044 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2045 for (jj=0;jj<size_of_constraint;jj++) { 2046 for (ii=0;ii<primal_dofs;ii++) { 2047 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2048 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2049 } 2050 } 2051 if (!valid_qr) { 2052 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2053 for (jj=0;jj<size_of_constraint;jj++) { 2054 for (ii=0;ii<primal_dofs;ii++) { 2055 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2056 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2057 } 2058 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2059 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2060 } 2061 } 2062 } 2063 } else { 2064 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2065 } 2066 } 2067 } else { /* simple transformation block */ 2068 PetscInt row,col; 2069 PetscScalar val; 2070 for (j=0;j<size_of_constraint;j++) { 2071 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2072 if (!PetscBTLookup(is_primal,row)) { 2073 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2074 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2075 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2076 } else { 2077 for (k=0;k<size_of_constraint;k++) { 2078 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2079 if (row != col) { 2080 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2081 } else { 2082 val = 1.0; 2083 } 2084 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2085 } 2086 } 2087 } 2088 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2089 } 2090 /* increment primal counter */ 2091 primal_counter += primal_dofs; 2092 } else { 2093 if (pcbddc->dbg_flag) { 2094 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2095 } 2096 } 2097 /* increment constraint counter total_counts */ 2098 total_counts += primal_dofs; 2099 } 2100 2101 /* free workspace */ 2102 if (qr_needed) { 2103 if (pcbddc->dbg_flag) { 2104 ierr = PetscFree(work);CHKERRQ(ierr); 2105 } 2106 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2107 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2108 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2109 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2110 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2111 } 2112 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2113 /* assembling */ 2114 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2115 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2116 /* 2117 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2118 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2119 */ 2120 } 2121 2122 /* get indices in local ordering for vertices and constraints */ 2123 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2124 ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr); 2125 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2126 } 2127 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2128 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2129 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2130 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2131 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2132 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2133 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2134 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2135 /* set quantities in PCBDDC data struct */ 2136 pcbddc->n_actual_vertices = i; 2137 /* check if a new primal space has been introduced */ 2138 pcbddc->new_primal_space_local = PETSC_TRUE; 2139 if (olocal_primal_size == pcbddc->local_primal_size) { 2140 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2141 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2142 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2143 } 2144 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2145 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2146 2147 /* flush dbg viewer */ 2148 if (pcbddc->dbg_flag) { 2149 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2150 } 2151 2152 /* free workspace */ 2153 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2154 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2155 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2156 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2157 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2158 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2159 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2160 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2161 PetscFunctionReturn(0); 2162 } 2163 2164 #undef __FUNCT__ 2165 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2166 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2167 { 2168 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2169 PC_IS *pcis = (PC_IS*)pc->data; 2170 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2171 PetscInt ierr,i,vertex_size; 2172 PetscViewer viewer=pcbddc->dbg_viewer; 2173 2174 PetscFunctionBegin; 2175 /* Reset previously computed graph */ 2176 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2177 /* Init local Graph struct */ 2178 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2179 2180 /* Check validity of the csr graph passed in by the user */ 2181 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2182 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2183 } 2184 2185 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2186 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2187 Mat mat_adj; 2188 const PetscInt *xadj,*adjncy; 2189 PetscBool flg_row=PETSC_TRUE; 2190 2191 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2192 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2193 if (!flg_row) { 2194 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2195 } 2196 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2197 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2198 if (!flg_row) { 2199 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2200 } 2201 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2202 } 2203 2204 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2205 vertex_size = 1; 2206 if (pcbddc->user_provided_isfordofs) { 2207 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2208 ierr = PetscMalloc(pcbddc->n_ISForDofs*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2209 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2210 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2211 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2212 } 2213 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2214 pcbddc->n_ISForDofs = 0; 2215 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2216 } 2217 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2218 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2219 } else { 2220 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2221 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2222 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2223 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2224 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2225 } 2226 } 2227 } 2228 2229 /* Setup of Graph */ 2230 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2231 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2232 } 2233 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2234 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2235 } 2236 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2237 2238 /* Graph's connected components analysis */ 2239 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2240 2241 /* print some info to stdout */ 2242 if (pcbddc->dbg_flag) { 2243 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2244 } 2245 2246 /* mark topography has done */ 2247 pcbddc->recompute_topography = PETSC_FALSE; 2248 PetscFunctionReturn(0); 2249 } 2250 2251 #undef __FUNCT__ 2252 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2253 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2254 { 2255 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2256 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2257 PetscErrorCode ierr; 2258 2259 PetscFunctionBegin; 2260 n = 0; 2261 vertices = 0; 2262 if (pcbddc->ConstraintMatrix) { 2263 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2264 for (i=0;i<local_primal_size;i++) { 2265 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2266 if (size_of_constraint == 1) n++; 2267 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2268 } 2269 if (vertices_idx) { 2270 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2271 n = 0; 2272 for (i=0;i<local_primal_size;i++) { 2273 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2274 if (size_of_constraint == 1) { 2275 vertices[n++]=row_cmat_indices[0]; 2276 } 2277 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2278 } 2279 } 2280 } 2281 *n_vertices = n; 2282 if (vertices_idx) *vertices_idx = vertices; 2283 PetscFunctionReturn(0); 2284 } 2285 2286 #undef __FUNCT__ 2287 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2288 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2289 { 2290 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2291 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2292 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2293 PetscBT touched; 2294 PetscErrorCode ierr; 2295 2296 /* This function assumes that the number of local constraints per connected component 2297 is not greater than the number of nodes defined for the connected component 2298 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2299 PetscFunctionBegin; 2300 n = 0; 2301 constraints_index = 0; 2302 if (pcbddc->ConstraintMatrix) { 2303 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2304 max_size_of_constraint = 0; 2305 for (i=0;i<local_primal_size;i++) { 2306 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2307 if (size_of_constraint > 1) { 2308 n++; 2309 } 2310 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2311 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2312 } 2313 if (constraints_idx) { 2314 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2315 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2316 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2317 n = 0; 2318 for (i=0;i<local_primal_size;i++) { 2319 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2320 if (size_of_constraint > 1) { 2321 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2322 /* find first untouched local node */ 2323 j = 0; 2324 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2325 min_index = row_cmat_global_indices[j]; 2326 min_loc = j; 2327 /* search the minimum among nodes not yet touched on the connected component 2328 since there can be more than one constraint on a single cc */ 2329 for (j=1;j<size_of_constraint;j++) { 2330 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2331 min_index = row_cmat_global_indices[j]; 2332 min_loc = j; 2333 } 2334 } 2335 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2336 constraints_index[n++] = row_cmat_indices[min_loc]; 2337 } 2338 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2339 } 2340 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2341 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2342 } 2343 } 2344 *n_constraints = n; 2345 if (constraints_idx) *constraints_idx = constraints_index; 2346 PetscFunctionReturn(0); 2347 } 2348 2349 /* the next two functions has been adapted from pcis.c */ 2350 #undef __FUNCT__ 2351 #define __FUNCT__ "PCBDDCApplySchur" 2352 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2353 { 2354 PetscErrorCode ierr; 2355 PC_IS *pcis = (PC_IS*)(pc->data); 2356 2357 PetscFunctionBegin; 2358 if (!vec2_B) { vec2_B = v; } 2359 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2360 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2361 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2362 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2363 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2364 PetscFunctionReturn(0); 2365 } 2366 2367 #undef __FUNCT__ 2368 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2369 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2370 { 2371 PetscErrorCode ierr; 2372 PC_IS *pcis = (PC_IS*)(pc->data); 2373 2374 PetscFunctionBegin; 2375 if (!vec2_B) { vec2_B = v; } 2376 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2377 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2378 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2379 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2380 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2381 PetscFunctionReturn(0); 2382 } 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "PCBDDCSubsetNumbering" 2386 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2387 { 2388 Vec local_vec,global_vec; 2389 IS seqis,paris; 2390 VecScatter scatter_ctx; 2391 PetscScalar *array; 2392 PetscInt *temp_global_dofs; 2393 PetscScalar globalsum; 2394 PetscInt i,j,s; 2395 PetscInt nlocals,first_index,old_index,max_local; 2396 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2397 PetscMPIInt *dof_sizes,*dof_displs; 2398 PetscBool first_found; 2399 PetscErrorCode ierr; 2400 2401 PetscFunctionBegin; 2402 /* mpi buffers */ 2403 MPI_Comm_size(comm,&size_prec_comm); 2404 MPI_Comm_rank(comm,&rank_prec_comm); 2405 j = ( !rank_prec_comm ? size_prec_comm : 0); 2406 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2407 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2408 /* get maximum size of subset */ 2409 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2410 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2411 max_local = 0; 2412 if (n_local_dofs) { 2413 max_local = temp_global_dofs[0]; 2414 for (i=1;i<n_local_dofs;i++) { 2415 if (max_local < temp_global_dofs[i] ) { 2416 max_local = temp_global_dofs[i]; 2417 } 2418 } 2419 } 2420 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2421 max_global++; 2422 max_local = 0; 2423 if (n_local_dofs) { 2424 max_local = local_dofs[0]; 2425 for (i=1;i<n_local_dofs;i++) { 2426 if (max_local < local_dofs[i] ) { 2427 max_local = local_dofs[i]; 2428 } 2429 } 2430 } 2431 max_local++; 2432 /* allocate workspace */ 2433 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2434 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2435 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2436 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2437 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2438 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2439 /* create scatter */ 2440 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2441 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2442 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2443 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2444 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2445 /* init array */ 2446 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2447 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2448 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2449 if (local_dofs_mult) { 2450 for (i=0;i<n_local_dofs;i++) { 2451 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2452 } 2453 } else { 2454 for (i=0;i<n_local_dofs;i++) { 2455 array[local_dofs[i]]=1.0; 2456 } 2457 } 2458 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2459 /* scatter into global vec and get total number of global dofs */ 2460 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2461 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2462 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2463 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2464 /* Fill global_vec with cumulative function for global numbering */ 2465 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2466 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2467 nlocals = 0; 2468 first_index = -1; 2469 first_found = PETSC_FALSE; 2470 for (i=0;i<s;i++) { 2471 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2472 first_found = PETSC_TRUE; 2473 first_index = i; 2474 } 2475 nlocals += (PetscInt)PetscRealPart(array[i]); 2476 } 2477 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2478 if (!rank_prec_comm) { 2479 dof_displs[0]=0; 2480 for (i=1;i<size_prec_comm;i++) { 2481 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2482 } 2483 } 2484 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2485 if (first_found) { 2486 array[first_index] += (PetscScalar)nlocals; 2487 old_index = first_index; 2488 for (i=first_index+1;i<s;i++) { 2489 if (PetscRealPart(array[i]) > 0.0) { 2490 array[i] += array[old_index]; 2491 old_index = i; 2492 } 2493 } 2494 } 2495 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2496 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2497 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2498 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2499 /* get global ordering of local dofs */ 2500 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2501 if (local_dofs_mult) { 2502 for (i=0;i<n_local_dofs;i++) { 2503 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2504 } 2505 } else { 2506 for (i=0;i<n_local_dofs;i++) { 2507 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2508 } 2509 } 2510 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2511 /* free workspace */ 2512 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2513 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2514 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2515 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2516 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2517 /* return pointer to global ordering of local dofs */ 2518 *global_numbering_subset = temp_global_dofs; 2519 PetscFunctionReturn(0); 2520 } 2521 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2524 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2525 { 2526 PetscInt i,j; 2527 PetscScalar *alphas; 2528 PetscErrorCode ierr; 2529 2530 PetscFunctionBegin; 2531 /* this implements stabilized Gram-Schmidt */ 2532 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2533 for (i=0;i<n;i++) { 2534 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2535 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2536 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2537 } 2538 ierr = PetscFree(alphas);CHKERRQ(ierr); 2539 PetscFunctionReturn(0); 2540 } 2541 2542 /* TODO 2543 - now preallocation is done assuming SEQDENSE local matrices 2544 */ 2545 #undef __FUNCT__ 2546 #define __FUNCT__ "MatISGetMPIXAIJ" 2547 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M) 2548 { 2549 Mat new_mat; 2550 Mat_IS *matis = (Mat_IS*)(mat->data); 2551 /* info on mat */ 2552 /* ISLocalToGlobalMapping rmapping,cmapping; */ 2553 PetscInt bs,rows,cols; 2554 PetscInt lrows,lcols; 2555 PetscInt local_rows,local_cols; 2556 PetscBool isdense; 2557 /* values insertion */ 2558 PetscScalar *array; 2559 PetscInt *local_indices,*global_indices; 2560 /* work */ 2561 PetscInt i,j,index_row; 2562 PetscErrorCode ierr; 2563 2564 PetscFunctionBegin; 2565 /* MISSING CHECKS 2566 - rectangular case not covered (it is not allowed by MATIS) 2567 */ 2568 /* get info from mat */ 2569 /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */ 2570 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2571 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2572 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2573 2574 /* work */ 2575 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2576 for (i=0;i<local_rows;i++) local_indices[i]=i; 2577 /* map indices of local mat to global values */ 2578 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2579 /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */ 2580 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2581 2582 if (reuse==MAT_INITIAL_MATRIX) { 2583 Vec vec_dnz,vec_onz; 2584 PetscScalar *my_dnz,*my_onz; 2585 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2586 PetscInt index_col,owner; 2587 PetscMPIInt nsubdomains; 2588 2589 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2590 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2591 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2592 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2593 ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr); 2594 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2595 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2596 2597 /* 2598 preallocation 2599 */ 2600 2601 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2602 /* 2603 Some vectors are needed to sum up properly on shared interface dofs. 2604 Preallocation macros cannot do the job. 2605 Note that preallocation is not exact, since it overestimates nonzeros 2606 */ 2607 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2608 /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */ 2609 ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr); 2610 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2611 /* All processes need to compute entire row ownership */ 2612 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2613 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2614 for (i=0;i<nsubdomains;i++) { 2615 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2616 row_ownership[j]=i; 2617 } 2618 } 2619 2620 /* 2621 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2622 then, they will be summed up properly. This way, preallocation is always sufficient 2623 */ 2624 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2625 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2626 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2627 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2628 for (i=0;i<local_rows;i++) { 2629 index_row = global_indices[i]; 2630 for (j=i;j<local_rows;j++) { 2631 owner = row_ownership[index_row]; 2632 index_col = global_indices[j]; 2633 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2634 my_dnz[i] += 1.0; 2635 } else { /* offdiag block */ 2636 my_onz[i] += 1.0; 2637 } 2638 /* same as before, interchanging rows and cols */ 2639 if (i != j) { 2640 owner = row_ownership[index_col]; 2641 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2642 my_dnz[j] += 1.0; 2643 } else { 2644 my_onz[j] += 1.0; 2645 } 2646 } 2647 } 2648 } 2649 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2650 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2651 if (local_rows) { /* multilevel guard */ 2652 ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2653 ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2654 } 2655 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2656 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2657 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2658 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2659 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2660 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2661 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2662 2663 /* set computed preallocation in dnz and onz */ 2664 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2665 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2666 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2667 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2668 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2669 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2670 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2671 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2672 2673 /* Resize preallocation if overestimated */ 2674 for (i=0;i<lrows;i++) { 2675 dnz[i] = PetscMin(dnz[i],lcols); 2676 onz[i] = PetscMin(onz[i],cols-lcols); 2677 } 2678 /* set preallocation */ 2679 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2680 for (i=0;i<lrows/bs;i++) { 2681 dnz[i] = dnz[i*bs]/bs; 2682 onz[i] = onz[i*bs]/bs; 2683 } 2684 ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2685 for (i=0;i<lrows/bs;i++) { 2686 dnz[i] = dnz[i]-i; 2687 } 2688 ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2689 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2690 *M = new_mat; 2691 } else { 2692 PetscInt mbs,mrows,mcols; 2693 /* some checks */ 2694 ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); 2695 ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); 2696 if (mrows != rows) { 2697 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); 2698 } 2699 if (mrows != rows) { 2700 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); 2701 } 2702 if (mbs != bs) { 2703 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); 2704 } 2705 ierr = MatZeroEntries(*M);CHKERRQ(ierr); 2706 } 2707 /* set local to global mappings */ 2708 /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */ 2709 /* Set values */ 2710 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2711 if (isdense) { /* special case for dense local matrices */ 2712 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2713 ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr); 2714 ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2715 ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr); 2716 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2717 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2718 } else { /* very basic values insertion for all other matrix types */ 2719 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2720 for (i=0;i<local_rows;i++) { 2721 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2722 /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */ 2723 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2724 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2725 ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2726 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2727 } 2728 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2729 } 2730 ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2731 ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2732 if (isdense) { 2733 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2734 } 2735 PetscFunctionReturn(0); 2736 } 2737 2738 #undef __FUNCT__ 2739 #define __FUNCT__ "MatISGetSubassemblingPattern" 2740 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2741 { 2742 Mat subdomain_adj; 2743 IS new_ranks,ranks_send_to; 2744 MatPartitioning partitioner; 2745 Mat_IS *matis; 2746 PetscInt n_neighs,*neighs,*n_shared,**shared; 2747 PetscInt prank; 2748 PetscMPIInt size,rank,color; 2749 PetscInt *xadj,*adjncy,*oldranks; 2750 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2751 PetscInt i,j,n_subdomains,local_size,threshold=0; 2752 PetscErrorCode ierr; 2753 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2754 PetscSubcomm subcomm; 2755 2756 PetscFunctionBegin; 2757 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2758 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2759 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2760 2761 /* Get info on mapping */ 2762 matis = (Mat_IS*)(mat->data); 2763 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2764 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2765 2766 /* build local CSR graph of subdomains' connectivity */ 2767 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2768 xadj[0] = 0; 2769 xadj[1] = PetscMax(n_neighs-1,0); 2770 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2771 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2772 2773 if (threshold) { 2774 PetscInt* count,min_threshold; 2775 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2776 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2777 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2778 for (j=0;j<n_shared[i];j++) { 2779 count[shared[i][j]] += 1; 2780 } 2781 } 2782 /* adapt threshold since we dont want to lose significant connections */ 2783 min_threshold = n_neighs; 2784 for (i=1;i<n_neighs;i++) { 2785 for (j=0;j<n_shared[i];j++) { 2786 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2787 } 2788 } 2789 threshold = PetscMax(min_threshold+1,threshold); 2790 xadj[1] = 0; 2791 for (i=1;i<n_neighs;i++) { 2792 for (j=0;j<n_shared[i];j++) { 2793 if (count[shared[i][j]] < threshold) { 2794 adjncy[xadj[1]] = neighs[i]; 2795 adjncy_wgt[xadj[1]] = n_shared[i]; 2796 xadj[1]++; 2797 break; 2798 } 2799 } 2800 } 2801 ierr = PetscFree(count);CHKERRQ(ierr); 2802 } else { 2803 if (xadj[1]) { 2804 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2805 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2806 } 2807 } 2808 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2809 if (use_square) { 2810 for (i=0;i<xadj[1];i++) { 2811 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2812 } 2813 } 2814 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2815 2816 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2817 2818 /* 2819 Restrict work on active processes only. 2820 */ 2821 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2822 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2823 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2824 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2825 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2826 if (color) { 2827 ierr = PetscFree(xadj);CHKERRQ(ierr); 2828 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2829 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2830 } else { 2831 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2832 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2833 prank = rank; 2834 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2835 /* 2836 for (i=0;i<size;i++) { 2837 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2838 } 2839 */ 2840 for (i=0;i<xadj[1];i++) { 2841 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2842 } 2843 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2844 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2845 n_subdomains = (PetscInt)size; 2846 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2847 2848 /* Partition */ 2849 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2850 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2851 if (use_vwgt) { 2852 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2853 v_wgt[0] = local_size; 2854 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2855 } 2856 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); */ 2857 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2858 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2859 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2860 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2861 2862 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2863 /* ranks_send_to_idx[0] = oldranks[is_indices[0]]; */ /* contiguos set of processes */ 2864 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2865 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2866 /* clean up */ 2867 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2868 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2869 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2870 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2871 } 2872 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2873 2874 /* assemble parallel IS for sends */ 2875 i = 1; 2876 if (color) i=0; 2877 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2878 2879 /* get back IS */ 2880 *is_sends = ranks_send_to; 2881 PetscFunctionReturn(0); 2882 } 2883 2884 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2885 2886 #undef __FUNCT__ 2887 #define __FUNCT__ "MatISSubassemble" 2888 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, MatReuse reuse, Mat *mat_n) 2889 { 2890 Mat local_mat; 2891 Mat_IS *matis; 2892 IS is_sends_internal; 2893 PetscInt rows,cols; 2894 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2895 PetscBool ismatis,isdense; 2896 ISLocalToGlobalMapping l2gmap; 2897 PetscInt* l2gmap_indices; 2898 const PetscInt* is_indices; 2899 MatType new_local_type; 2900 MatTypePrivate new_local_type_private; 2901 /* buffers */ 2902 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2903 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2904 /* MPI */ 2905 MPI_Comm comm; 2906 PetscMPIInt n_sends,n_recvs,commsize; 2907 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2908 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2909 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2910 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2911 PetscErrorCode ierr; 2912 2913 PetscFunctionBegin; 2914 /* checks */ 2915 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2916 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2917 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2918 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2919 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2920 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2921 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2922 if (reuse == MAT_REUSE_MATRIX) { 2923 PetscInt mrows,mcols,mnrows,mncols; 2924 PetscBool ismatis; 2925 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2926 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2927 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2928 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2929 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2930 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2931 } 2932 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2933 PetscValidLogicalCollectiveInt(mat,bs,0); 2934 /* prepare IS for sending if not provided */ 2935 if (!is_sends) { 2936 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2937 ierr = MatISGetSubassemblingPattern(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2938 } else { 2939 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2940 is_sends_internal = is_sends; 2941 } 2942 2943 /* get pointer of MATIS data */ 2944 matis = (Mat_IS*)mat->data; 2945 2946 /* get comm */ 2947 comm = PetscObjectComm((PetscObject)mat); 2948 2949 /* compute number of sends */ 2950 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2951 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2952 2953 /* compute number of receives */ 2954 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2955 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2956 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2957 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2958 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2959 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2960 ierr = PetscFree(iflags);CHKERRQ(ierr); 2961 2962 /* prepare send/receive buffers */ 2963 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2964 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2965 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2966 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2967 2968 /* Get data from local mat */ 2969 if (!isdense) { 2970 /* TODO: See below some guidelines on how to prepare the local buffers */ 2971 /* 2972 send_buffer_vals should contain the raw values of the local matrix 2973 send_buffer_idxs should contain: 2974 - MatType_PRIVATE type 2975 - PetscInt size_of_l2gmap 2976 - PetscInt global_row_indices[size_of_l2gmap] 2977 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2978 */ 2979 } else { 2980 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2981 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2982 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2983 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2984 send_buffer_idxs[1] = i; 2985 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2986 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2987 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2988 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2989 for (i=0;i<n_sends;i++) { 2990 ilengths_vals[is_indices[i]] = len*len; 2991 ilengths_idxs[is_indices[i]] = len+2; 2992 } 2993 } 2994 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2995 buf_size_idxs = 0; 2996 buf_size_vals = 0; 2997 for (i=0;i<n_recvs;i++) { 2998 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2999 buf_size_vals += (PetscInt)olengths_vals[i]; 3000 } 3001 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 3002 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 3003 3004 /* get new tags for clean communications */ 3005 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3006 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3007 3008 /* allocate for requests */ 3009 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 3010 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 3011 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 3012 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 3013 3014 /* communications */ 3015 ptr_idxs = recv_buffer_idxs; 3016 ptr_vals = recv_buffer_vals; 3017 for (i=0;i<n_recvs;i++) { 3018 source_dest = onodes[i]; 3019 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3020 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3021 ptr_idxs += olengths_idxs[i]; 3022 ptr_vals += olengths_vals[i]; 3023 } 3024 for (i=0;i<n_sends;i++) { 3025 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3026 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3027 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3028 } 3029 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3030 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3031 3032 /* assemble new l2g map */ 3033 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3034 ptr_idxs = recv_buffer_idxs; 3035 buf_size_idxs = 0; 3036 for (i=0;i<n_recvs;i++) { 3037 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3038 ptr_idxs += olengths_idxs[i]; 3039 } 3040 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 3041 ptr_idxs = recv_buffer_idxs; 3042 buf_size_idxs = 0; 3043 for (i=0;i<n_recvs;i++) { 3044 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3045 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3046 ptr_idxs += olengths_idxs[i]; 3047 } 3048 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3049 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3050 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3051 3052 /* infer new local matrix type from received local matrices type */ 3053 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3054 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3055 new_local_type_private = MATAIJ_PRIVATE; 3056 new_local_type = MATSEQAIJ; 3057 if (n_recvs) { 3058 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3059 ptr_idxs = recv_buffer_idxs; 3060 for (i=0;i<n_recvs;i++) { 3061 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3062 new_local_type_private = MATAIJ_PRIVATE; 3063 break; 3064 } 3065 ptr_idxs += olengths_idxs[i]; 3066 } 3067 switch (new_local_type_private) { 3068 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 3069 new_local_type = MATSEQAIJ; 3070 bs = 1; 3071 break; 3072 case MATAIJ_PRIVATE: 3073 new_local_type = MATSEQAIJ; 3074 bs = 1; 3075 break; 3076 case MATBAIJ_PRIVATE: 3077 new_local_type = MATSEQBAIJ; 3078 break; 3079 case MATSBAIJ_PRIVATE: 3080 new_local_type = MATSEQSBAIJ; 3081 break; 3082 default: 3083 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3084 break; 3085 } 3086 } 3087 3088 /* create MATIS object if needed */ 3089 if (reuse == MAT_INITIAL_MATRIX) { 3090 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3091 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3092 } else { 3093 /* it also destroys the local matrices */ 3094 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3095 } 3096 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3097 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3098 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3099 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3100 3101 /* set values */ 3102 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3103 ptr_vals = recv_buffer_vals; 3104 ptr_idxs = recv_buffer_idxs; 3105 for (i=0;i<n_recvs;i++) { 3106 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3107 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3108 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3109 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3110 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3111 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3112 } 3113 ptr_idxs += olengths_idxs[i]; 3114 ptr_vals += olengths_vals[i]; 3115 } 3116 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3117 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3118 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3119 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3120 3121 { /* check */ 3122 Vec lvec,rvec; 3123 PetscReal infty_error; 3124 3125 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3126 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3127 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3128 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3129 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3130 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3131 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3132 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3133 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3134 } 3135 3136 /* free workspace */ 3137 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3138 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3139 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3140 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3141 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3142 if (isdense) { 3143 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3144 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3145 } else { 3146 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3147 } 3148 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3149 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3150 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3151 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3152 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3153 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3154 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3155 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3156 ierr = PetscFree(onodes);CHKERRQ(ierr); 3157 PetscFunctionReturn(0); 3158 } 3159 3160 #undef __FUNCT__ 3161 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3162 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3163 { 3164 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3165 PC_IS *pcis = (PC_IS*)pc->data; 3166 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3167 MatNullSpace CoarseNullSpace=NULL; 3168 ISLocalToGlobalMapping coarse_islg; 3169 IS coarse_is; 3170 PetscInt max_it; 3171 PetscInt im_active=-1,active_procs=-1; 3172 PC pc_temp; 3173 PCType coarse_pc_type; 3174 KSPType coarse_ksp_type; 3175 PetscBool multilevel_requested,multilevel_allowed; 3176 PetscBool setsym,issym,isherm,ispreonly,isbddc,isnn,coarse_reuse; 3177 MatStructure matstruct; 3178 PetscErrorCode ierr; 3179 3180 PetscFunctionBegin; 3181 /* Assign global numbering to coarse dofs */ 3182 if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */ 3183 PetscInt ocoarse_size; 3184 ocoarse_size = pcbddc->coarse_size; 3185 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3186 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3187 /* see if we can avoid some work */ 3188 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3189 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3190 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3191 coarse_reuse = PETSC_FALSE; 3192 } else { /* we can safely reuse already computed coarse matrix */ 3193 coarse_reuse = PETSC_TRUE; 3194 } 3195 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3196 coarse_reuse = PETSC_FALSE; 3197 } 3198 /* reset any subassembling information */ 3199 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3200 } else { /* primal space has not been changed, so we can reuse coarse matrix */ 3201 coarse_reuse = PETSC_TRUE; 3202 } 3203 3204 /* test if we can go multilevel */ 3205 multilevel_allowed = PETSC_FALSE; 3206 multilevel_requested = PETSC_FALSE; 3207 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 3208 if (multilevel_requested) { 3209 /* count "active processes" */ 3210 im_active = !!(pcis->n); 3211 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3212 if (active_procs/pcbddc->coarsening_ratio < 2) { 3213 multilevel_allowed = PETSC_FALSE; 3214 } else { 3215 multilevel_allowed = PETSC_TRUE; 3216 } 3217 } 3218 3219 /* creates temporary l2gmap and IS for coarse indexes */ 3220 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3221 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3222 3223 /* creates temporary MATIS object for coarse matrix */ 3224 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3225 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3226 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3227 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3228 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3229 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3230 3231 /* set defaults for coarse KSP and PC */ 3232 if (multilevel_allowed) { 3233 coarse_ksp_type = KSPRICHARDSON; 3234 coarse_pc_type = PCBDDC; 3235 } else { 3236 coarse_ksp_type = KSPPREONLY; 3237 coarse_pc_type = PCREDUNDANT; 3238 } 3239 3240 /* create the coarse KSP object only once with defaults */ 3241 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3242 if (!pcbddc->coarse_ksp) { 3243 char prefix[256],str_level[16]; 3244 size_t len; 3245 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3246 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3247 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3248 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is,matstruct);CHKERRQ(ierr); 3249 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3250 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3251 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3252 /* prefix */ 3253 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3254 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3255 if (!pcbddc->current_level) { 3256 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3257 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3258 } else { 3259 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3260 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3261 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3262 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3263 *(prefix+len)='\0'; 3264 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3265 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3266 } 3267 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3268 /* allow user customization */ 3269 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3270 } 3271 3272 /* get some info after set from options */ 3273 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3274 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3275 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3276 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3277 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3278 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3279 isbddc = PETSC_FALSE; 3280 } 3281 3282 /* propagate BDDC info to the next level */ 3283 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3284 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3285 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3286 if (isbddc) { /* protects from unneded computations */ 3287 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3288 const PetscInt *idxs; 3289 IS *c_isfordofs,c_isneumann; 3290 ISLocalToGlobalMapping tmap; 3291 3292 /* create map between primal indices (in local representative ordering) and local subdomain numbering */ 3293 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3294 /* allocate space for temporary storage */ 3295 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3296 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3297 /* dofs splitting */ 3298 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&c_isfordofs);CHKERRQ(ierr); 3299 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3300 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3301 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3302 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3303 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3304 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3305 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3306 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&c_isfordofs[i]);CHKERRQ(ierr); 3307 /* ierr = ISView(c_isfordofs[i],0);CHKERRQ(ierr); */ 3308 } 3309 ierr = PCBDDCSetDofsSplitting(pc_temp,pcbddc->n_ISForDofsLocal,c_isfordofs);CHKERRQ(ierr); 3310 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3311 ierr = ISDestroy(&c_isfordofs[i]);CHKERRQ(ierr); 3312 } 3313 ierr = PetscFree(c_isfordofs);CHKERRQ(ierr); 3314 /* neumann boundaries */ 3315 if (pcbddc->NeumannBoundariesLocal) { 3316 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3317 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3318 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3319 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3320 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3321 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3322 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&c_isneumann);CHKERRQ(ierr); 3323 /* ierr = ISView(c_isneumann,0);CHKERRQ(ierr); */ 3324 ierr = PCBDDCSetNeumannBoundaries(pc_temp,c_isneumann);CHKERRQ(ierr); 3325 } 3326 ierr = ISDestroy(&c_isneumann);CHKERRQ(ierr); 3327 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3328 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3329 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3330 } 3331 3332 /* destroy no longer needed map */ 3333 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3334 3335 /* assemble coarse matrix */ 3336 if (isbddc || isnn) { 3337 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3338 ierr = MatISGetSubassemblingPattern(coarse_mat_is,pcbddc->coarsening_ratio,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3339 if (pcbddc->dbg_flag) { 3340 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3341 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3342 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 3343 } 3344 } 3345 if (coarse_reuse) { 3346 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3347 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3348 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,pcbddc->coarsening_ratio,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr); 3349 } else { 3350 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,pcbddc->coarsening_ratio,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3351 } 3352 } else { 3353 if (coarse_reuse) { 3354 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3355 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3356 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr); 3357 } else { 3358 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3359 } 3360 } 3361 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3362 3363 /* create local to global scatters for coarse problem */ 3364 if (pcbddc->new_primal_space) { 3365 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3366 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 3367 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3368 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3369 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3370 } 3371 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3372 3373 /* propagate symmetry info to coarse matrix */ 3374 issym = PETSC_FALSE; 3375 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3376 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3377 isherm = PETSC_FALSE; 3378 ierr = MatIsHermitianKnown(pc->pmat,&setsym,&isherm);CHKERRQ(ierr); 3379 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3380 3381 /* Compute coarse null space (special handling by BDDC only) */ 3382 if (pcbddc->NullSpace) { 3383 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3384 if (isbddc) { 3385 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3386 } else { 3387 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3388 } 3389 } 3390 3391 /* set operators */ 3392 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3393 3394 /* additional KSP customization */ 3395 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3396 if (max_it < 5) { 3397 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3398 } 3399 3400 /* print some info if requested */ 3401 if (pcbddc->dbg_flag) { 3402 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3403 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3404 if (!multilevel_allowed) { 3405 if (multilevel_requested) { 3406 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3407 } else if (pcbddc->max_levels) { 3408 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3409 } 3410 } 3411 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3412 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3413 } 3414 3415 /* setup coarse ksp */ 3416 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3417 3418 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3419 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3420 if (pcbddc->dbg_flag || !ispreonly) { 3421 KSP check_ksp; 3422 KSPType check_ksp_type; 3423 PC check_pc; 3424 Vec check_vec; 3425 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3426 PetscInt its; 3427 PetscBool compute; 3428 3429 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3430 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3431 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3432 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3433 if (ispreonly) { 3434 check_ksp_type = KSPPREONLY; 3435 compute = PETSC_FALSE; 3436 } else { 3437 check_ksp_type = KSPGMRES; 3438 compute = PETSC_TRUE; 3439 } 3440 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3441 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3442 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3443 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3444 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3445 /* create random vec */ 3446 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3447 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3448 if (CoarseNullSpace) { 3449 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3450 } 3451 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3452 /* solve coarse problem */ 3453 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3454 if (CoarseNullSpace) { 3455 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3456 } 3457 /* set eigenvalue estimation if preonly has not been requested */ 3458 if (!ispreonly) { 3459 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3460 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Coarse problem eigenvalues: %1.6e %1.6e\n",lambda_min,lambda_max);CHKERRQ(ierr); */ 3461 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3462 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3463 } 3464 /* check coarse problem residual error */ 3465 if (pcbddc->dbg_flag) { 3466 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3467 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3468 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3469 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3470 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3471 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3472 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3473 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3474 if (!ispreonly) { 3475 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3476 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3477 } 3478 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3479 } 3480 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3481 } 3482 3483 /* print additional info */ 3484 if (pcbddc->dbg_flag) { 3485 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3486 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3487 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3488 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3489 } 3490 3491 /* free memory */ 3492 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3493 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3494 PetscFunctionReturn(0); 3495 } 3496 3497 #undef __FUNCT__ 3498 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3499 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3500 { 3501 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3502 PC_IS* pcis = (PC_IS*)pc->data; 3503 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3504 PetscInt i,coarse_size; 3505 PetscInt *local_primal_indices; 3506 PetscErrorCode ierr; 3507 3508 PetscFunctionBegin; 3509 /* Compute global number of coarse dofs */ 3510 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3511 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3512 } 3513 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3514 3515 /* check numbering */ 3516 if (pcbddc->dbg_flag) { 3517 PetscScalar coarsesum,*array; 3518 3519 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3520 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3521 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3522 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3523 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3524 for (i=0;i<pcbddc->local_primal_size;i++) { 3525 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3526 } 3527 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3528 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3529 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3530 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3531 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3532 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3533 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3534 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3535 for (i=0;i<pcis->n;i++) { 3536 if (array[i] == 1.0) { 3537 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3538 } 3539 } 3540 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3541 for (i=0;i<pcis->n;i++) { 3542 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3543 } 3544 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3545 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3546 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3547 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3548 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3549 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3550 if (pcbddc->dbg_flag > 1) { 3551 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3552 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3553 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3554 for (i=0;i<pcbddc->local_primal_size;i++) { 3555 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3556 } 3557 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3558 } 3559 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3560 } 3561 /* get back data */ 3562 *coarse_size_n = coarse_size; 3563 *local_primal_indices_n = local_primal_indices; 3564 PetscFunctionReturn(0); 3565 } 3566 3567 #undef __FUNCT__ 3568 #define __FUNCT__ "PCBDDCGlobalToLocal" 3569 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis) 3570 { 3571 PC_IS* pcis = (PC_IS*)pc->data; 3572 IS localis_t; 3573 PetscInt i,lsize,*idxs; 3574 PetscScalar *vals; 3575 PetscErrorCode ierr; 3576 3577 PetscFunctionBegin; 3578 /* get dirichlet indices in local ordering exploiting local to global map */ 3579 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 3580 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3581 for (i=0;i<lsize;i++) vals[i] = 1.0; 3582 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3583 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3584 if (idxs) { /* multilevel guard */ 3585 ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3586 } 3587 ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr); 3588 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3589 ierr = PetscFree(vals);CHKERRQ(ierr); 3590 ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr); 3591 /* now compute dirichlet set in local ordering */ 3592 ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3593 ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3594 ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3595 for (i=0,lsize=0;i<pcis->n;i++) { 3596 if (vals[i] == 1.0) { 3597 lsize++; 3598 } 3599 } 3600 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 3601 for (i=0,lsize=0;i<pcis->n;i++) { 3602 if (vals[i] == 1.0) { 3603 idxs[lsize++] = i; 3604 } 3605 } 3606 ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3607 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 3608 *localis = localis_t; 3609 PetscFunctionReturn(0); 3610 } 3611