1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 76 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCResetSolvers" 82 PetscErrorCode PCBDDCResetSolvers(PC pc) 83 { 84 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 89 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 94 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 100 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 101 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 104 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 107 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 108 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 110 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 111 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 117 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 118 { 119 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 120 PC_IS *pcis = (PC_IS*)pc->data; 121 VecType impVecType; 122 PetscInt n_constraints,n_R,old_size; 123 PetscErrorCode ierr; 124 125 PetscFunctionBegin; 126 if (!pcbddc->ConstraintMatrix) { 127 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 128 } 129 /* get sizes */ 130 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 131 n_R = pcis->n-pcbddc->n_actual_vertices; 132 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 133 /* local work vectors (try to avoid unneeded work)*/ 134 /* R nodes */ 135 old_size = -1; 136 if (pcbddc->vec1_R) { 137 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 138 } 139 if (n_R != old_size) { 140 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 141 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 142 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 143 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 144 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 145 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 146 } 147 /* local primal dofs */ 148 old_size = -1; 149 if (pcbddc->vec1_P) { 150 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 151 } 152 if (pcbddc->local_primal_size != old_size) { 153 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 154 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 155 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 156 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 157 } 158 /* local explicit constraints */ 159 old_size = -1; 160 if (pcbddc->vec1_C) { 161 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 162 } 163 if (n_constraints && n_constraints != old_size) { 164 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 165 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 166 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 167 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "PCBDDCSetUpCorrection" 174 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 175 { 176 PetscErrorCode ierr; 177 /* pointers to pcis and pcbddc */ 178 PC_IS* pcis = (PC_IS*)pc->data; 179 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 180 /* submatrices of local problem */ 181 Mat A_RV,A_VR,A_VV; 182 /* working matrices */ 183 Mat M1,M2,M3,C_CR; 184 /* working vectors */ 185 Vec vec1_C,vec2_C,vec1_V,vec2_V; 186 /* additional working stuff */ 187 IS is_aux; 188 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 189 const PetscScalar *array,*row_cmat_values; 190 const PetscInt *row_cmat_indices,*idx_R_local; 191 PetscInt *idx_V_B,*auxindices; 192 PetscInt n_vertices,n_constraints,size_of_constraint; 193 PetscInt i,j,n_R,n_D,n_B; 194 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check; 195 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 196 MatType impMatType; 197 /* some shortcuts to scalars */ 198 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 199 /* for debugging purposes */ 200 PetscReal *coarsefunctions_errors,*constraints_errors; 201 202 PetscFunctionBegin; 203 /* get number of vertices (corners plus constraints with change of basis) 204 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 205 n_vertices = pcbddc->n_actual_vertices; 206 n_constraints = pcbddc->local_primal_size-n_vertices; 207 /* Set Non-overlapping dimensions */ 208 n_B = pcis->n_B; n_D = pcis->n - n_B; 209 n_R = pcis->n-n_vertices; 210 211 /* Set types for local objects needed by BDDC precondtioner */ 212 impMatType = MATSEQDENSE; 213 214 /* Allocating some extra storage just to be safe */ 215 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 216 for (i=0;i<pcis->n;i++) auxindices[i]=i; 217 218 /* vertices in boundary numbering */ 219 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 220 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 221 if (i != n_vertices) { 222 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 223 } 224 225 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 226 if (n_constraints) { 227 /* see if we can save some allocations */ 228 if (pcbddc->local_auxmat2) { 229 PetscInt on_R,on_constraints; 230 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 231 if (on_R != n_R || on_constraints != n_constraints) { 232 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 233 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 234 } 235 } 236 /* work vectors */ 237 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 238 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 239 /* auxiliary matrices */ 240 if (!pcbddc->local_auxmat2) { 241 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 242 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 243 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 244 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 245 } 246 247 /* Extract constraints on R nodes: C_{CR} */ 248 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 249 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 250 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 251 252 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 253 for (i=0;i<n_constraints;i++) { 254 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 255 /* Get row of constraint matrix in R numbering */ 256 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 257 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 258 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 259 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 260 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 261 /* Solve for row of constraint matrix in R numbering */ 262 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 263 /* Set values in local_auxmat2 */ 264 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 265 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 266 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 267 } 268 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 269 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 270 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 271 272 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 273 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 274 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 275 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 276 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 277 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 278 ierr = MatSetUp(M1);CHKERRQ(ierr); 279 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 280 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 281 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 282 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 283 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 284 ierr = MatDestroy(&M2);CHKERRQ(ierr); 285 ierr = MatDestroy(&M3);CHKERRQ(ierr); 286 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 287 if (!pcbddc->local_auxmat1) { 288 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 289 } else { 290 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 291 } 292 } 293 294 /* Get submatrices from subdomain matrix */ 295 if (n_vertices) { 296 PetscInt ibs,mbs; 297 PetscBool issbaij; 298 Mat newmat; 299 300 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 301 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 302 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 303 if (ibs != mbs) { /* need to convert to SEQAIJ */ 304 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 305 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 307 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 308 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 309 } else { 310 /* this is safe */ 311 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 312 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 313 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 314 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 315 /* which of the two approaches is faster? */ 316 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 317 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 318 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 319 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 320 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 321 } else { 322 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 323 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 324 } 325 } 326 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 327 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 328 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 329 } 330 331 /* Matrix of coarse basis functions (local) */ 332 if (pcbddc->coarse_phi_B) { 333 PetscInt on_B,on_primal; 334 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 335 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 336 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 337 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 338 } 339 } 340 if (pcbddc->coarse_phi_D) { 341 PetscInt on_D,on_primal; 342 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 343 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 344 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 345 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 346 } 347 } 348 if (!pcbddc->coarse_phi_B) { 349 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 350 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 351 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 352 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 353 } 354 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 355 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 356 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 357 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 358 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 359 } 360 361 if (pcbddc->dbg_flag) { 362 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 363 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 364 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 365 } 366 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 367 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 368 369 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 370 371 /* vertices */ 372 for (i=0;i<n_vertices;i++) { 373 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 374 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 375 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 376 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 377 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 378 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 379 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 380 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 381 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 382 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 383 if (n_constraints) { 384 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 385 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 386 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 387 } 388 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 389 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 390 391 /* Set values in coarse basis function and subdomain part of coarse_mat */ 392 /* coarse basis functions */ 393 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 394 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 395 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 396 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 397 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 398 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 399 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 400 if (pcbddc->switch_static || pcbddc->dbg_flag) { 401 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 402 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 404 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 405 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 406 } 407 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 408 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 409 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 410 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 411 if (n_constraints) { 412 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 413 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 415 } 416 417 /* check */ 418 if (pcbddc->dbg_flag) { 419 /* assemble subdomain vector on local nodes */ 420 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 421 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 422 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 423 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 424 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 425 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 426 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 427 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 428 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 429 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 430 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 431 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 432 if (n_constraints) { 433 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 434 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 436 } 437 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 438 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 440 /* check saddle point solution */ 441 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 442 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 443 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 444 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 445 /* shift by the identity matrix */ 446 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 447 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 449 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 450 } 451 } 452 453 /* constraints */ 454 for (i=0;i<n_constraints;i++) { 455 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 456 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 457 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 458 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 459 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 460 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 461 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 462 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 463 if (n_vertices) { 464 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 465 } 466 /* Set values in coarse basis function and subdomain part of coarse_mat */ 467 /* coarse basis functions */ 468 j = i+n_vertices; /* don't touch this! */ 469 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 470 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 473 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 474 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 475 if (pcbddc->switch_static || pcbddc->dbg_flag) { 476 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 479 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 480 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 481 } 482 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 483 if (n_vertices) { 484 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 485 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 486 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 487 } 488 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 489 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 490 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 491 492 if (pcbddc->dbg_flag) { 493 /* assemble subdomain vector on nodes */ 494 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 495 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 496 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 497 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 498 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 499 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 500 /* assemble subdomain vector of lagrange multipliers */ 501 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 502 if (n_vertices) { 503 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 504 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 505 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 506 } 507 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 508 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 509 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 510 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 511 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 512 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 513 /* check saddle point solution */ 514 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 515 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 516 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 517 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 518 /* shift by the identity matrix */ 519 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 520 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 521 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 522 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 523 } 524 } 525 /* call assembling routines for local coarse basis */ 526 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 527 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 if (pcbddc->switch_static || pcbddc->dbg_flag) { 529 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 530 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 } 532 533 /* compute other basis functions for non-symmetric problems */ 534 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 535 if (!setsym || (setsym && !issym)) { 536 if (!pcbddc->coarse_psi_B) { 537 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 538 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 539 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 540 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 541 } 542 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 543 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 544 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 545 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 546 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 547 } 548 for (i=0;i<pcbddc->local_primal_size;i++) { 549 if (n_constraints) { 550 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 551 for (j=0;j<n_constraints;j++) { 552 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 553 } 554 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 555 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 556 } 557 if (i<n_vertices) { 558 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 559 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 560 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 561 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 562 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 563 if (n_constraints) { 564 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 565 } 566 } else { 567 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 568 } 569 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 570 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 571 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 572 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 573 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 574 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 575 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 576 if (i<n_vertices) { 577 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 578 } 579 if (pcbddc->switch_static || pcbddc->dbg_flag) { 580 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 581 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 582 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 583 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 584 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 585 } 586 587 if (pcbddc->dbg_flag) { 588 /* assemble subdomain vector on nodes */ 589 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 590 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 591 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 592 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 593 if (i<n_vertices) { 594 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 595 } 596 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 597 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 598 /* assemble subdomain vector of lagrange multipliers */ 599 for (j=0;j<pcbddc->local_primal_size;j++) { 600 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 601 } 602 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 603 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 604 /* check saddle point solution */ 605 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 606 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 607 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 608 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 609 /* shift by the identity matrix */ 610 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 611 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 612 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 613 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 614 } 615 } 616 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 617 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 618 if (pcbddc->switch_static || pcbddc->dbg_flag) { 619 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 620 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 621 } 622 unsymmetric_check = PETSC_TRUE; 623 } else { /* take references to already computed coarse basis */ 624 unsymmetric_check = PETSC_FALSE; 625 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 626 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 627 if (pcbddc->coarse_phi_D) { 628 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 629 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 630 } 631 } 632 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 633 /* Checking coarse_sub_mat and coarse basis functios */ 634 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 635 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 636 if (pcbddc->dbg_flag) { 637 Mat coarse_sub_mat; 638 Mat AUXMAT,TM1,TM2,TM3,TM4; 639 Mat coarse_phi_D,coarse_phi_B; 640 Mat coarse_psi_D,coarse_psi_B; 641 Mat A_II,A_BB,A_IB,A_BI; 642 MatType checkmattype=MATSEQAIJ; 643 PetscReal real_value; 644 645 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 646 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 647 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 648 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 649 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 650 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 651 if (unsymmetric_check) { 652 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 653 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 654 } 655 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 656 657 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 658 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 659 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 660 if (unsymmetric_check) { 661 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 662 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 663 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 664 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 665 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 666 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 667 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 668 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 669 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 670 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 671 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 672 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 673 } else { 674 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 675 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 676 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 677 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 678 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 679 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 680 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 681 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 682 } 683 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 684 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 685 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 686 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 687 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 688 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 689 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 694 for (i=0;i<pcbddc->local_primal_size;i++) { 695 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 696 } 697 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 698 for (i=0;i<pcbddc->local_primal_size;i++) { 699 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 700 } 701 if (unsymmetric_check) { 702 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 703 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 704 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 705 } 706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 707 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 708 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 709 } 710 } 711 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 712 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 713 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 714 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 715 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 716 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 717 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 718 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 719 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 720 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 721 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 722 if (unsymmetric_check) { 723 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 724 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 725 } 726 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 727 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 728 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 729 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 730 } 731 /* free memory */ 732 if (n_vertices) { 733 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 734 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 735 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 736 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 737 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 738 } 739 if (n_constraints) { 740 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 741 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 742 ierr = MatDestroy(&M1);CHKERRQ(ierr); 743 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 744 } 745 ierr = PetscFree(auxindices);CHKERRQ(ierr); 746 /* get back data */ 747 *coarse_submat_vals_n = coarse_submat_vals; 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 753 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 754 { 755 PC_IS* pcis = (PC_IS*)(pc->data); 756 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 757 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 758 PetscBool issbaij,isseqaij; 759 /* manage repeated solves */ 760 MatReuse reuse; 761 MatStructure matstruct; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) { 766 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 767 } 768 /* get mat flags */ 769 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 770 reuse = MAT_INITIAL_MATRIX; 771 if (pc->setupcalled) { 772 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 773 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 774 if (matstruct == SAME_NONZERO_PATTERN) { 775 reuse = MAT_REUSE_MATRIX; 776 } else { 777 reuse = MAT_INITIAL_MATRIX; 778 } 779 } 780 if (reuse == MAT_INITIAL_MATRIX) { 781 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 782 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 783 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 784 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 785 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 786 } 787 788 /* transform local matrices if needed */ 789 if (pcbddc->use_change_of_basis) { 790 Mat change_mat_all; 791 PetscScalar *row_cmat_values; 792 PetscInt *row_cmat_indices; 793 PetscInt *nnz,*is_indices,*temp_indices; 794 PetscInt i,j,k,n_D,n_B; 795 796 /* Get Non-overlapping dimensions */ 797 n_B = pcis->n_B; 798 n_D = pcis->n-n_B; 799 800 /* compute nonzero structure of change of basis on all local nodes */ 801 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 802 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 803 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 804 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 805 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 806 k=1; 807 for (i=0;i<n_B;i++) { 808 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 809 nnz[is_indices[i]]=j; 810 if (k < j) k = j; 811 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 812 } 813 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 814 /* assemble change of basis matrix on the whole set of local dofs */ 815 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 816 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 817 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 818 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 819 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 820 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 821 for (i=0;i<n_D;i++) { 822 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 823 } 824 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 825 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 826 for (i=0;i<n_B;i++) { 827 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 828 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 829 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 830 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 831 } 832 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 833 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 834 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 835 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr); 836 if (isseqaij) { 837 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 838 } else { 839 Mat work_mat; 840 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 841 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 842 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 843 } 844 /* 845 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 846 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 847 */ 848 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 849 ierr = PetscFree(nnz);CHKERRQ(ierr); 850 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 851 } else { 852 /* without change of basis, the local matrix is unchanged */ 853 if (!pcbddc->local_mat) { 854 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 855 pcbddc->local_mat = matis->A; 856 } 857 } 858 859 /* get submatrices */ 860 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 861 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 862 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 863 if (!issbaij) { 864 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 865 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 866 } else { 867 Mat newmat; 868 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 869 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 870 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 871 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 872 } 873 PetscFunctionReturn(0); 874 } 875 876 #undef __FUNCT__ 877 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 878 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 879 { 880 PC_IS* pcis = (PC_IS*)(pc->data); 881 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 882 IS is_aux1,is_aux2; 883 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 884 PetscInt n_vertices,i,j,n_R,n_D,n_B; 885 PetscInt vbs,bs; 886 PetscBT bitmask; 887 PetscErrorCode ierr; 888 889 PetscFunctionBegin; 890 /* 891 No need to setup local scatters if 892 - primal space is unchanged 893 AND 894 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 895 AND 896 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 897 */ 898 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 899 PetscFunctionReturn(0); 900 } 901 /* destroy old objects */ 902 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 903 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 904 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 905 /* Set Non-overlapping dimensions */ 906 n_B = pcis->n_B; n_D = pcis->n - n_B; 907 n_vertices = pcbddc->n_actual_vertices; 908 /* create auxiliary bitmask */ 909 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 910 for (i=0;i<n_vertices;i++) { 911 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 912 } 913 914 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 915 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 916 for (i=0, n_R=0; i<pcis->n; i++) { 917 if (!PetscBTLookup(bitmask,i)) { 918 idx_R_local[n_R] = i; 919 n_R++; 920 } 921 } 922 923 /* Block code */ 924 vbs = 1; 925 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 926 if (bs>1 && !(n_vertices%bs)) { 927 PetscBool is_blocked = PETSC_TRUE; 928 PetscInt *vary; 929 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 930 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 931 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 932 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 933 for (i=0; i<n_vertices; i++) { 934 if (vary[i]!=0 && vary[i]!=bs) { 935 is_blocked = PETSC_FALSE; 936 break; 937 } 938 } 939 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 940 vbs = bs; 941 for (i=0;i<n_R/vbs;i++) { 942 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 943 } 944 } 945 ierr = PetscFree(vary);CHKERRQ(ierr); 946 } 947 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 948 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 949 950 /* print some info if requested */ 951 if (pcbddc->dbg_flag) { 952 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 953 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 954 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 956 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 957 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 958 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 959 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 960 } 961 962 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 963 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 964 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 965 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 966 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 967 for (i=0; i<n_D; i++) { 968 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 969 } 970 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 971 for (i=0, j=0; i<n_R; i++) { 972 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 973 aux_array1[j++] = i; 974 } 975 } 976 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 977 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 978 for (i=0, j=0; i<n_B; i++) { 979 if (!PetscBTLookup(bitmask,is_indices[i])) { 980 aux_array2[j++] = i; 981 } 982 } 983 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 984 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 985 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 986 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 987 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 988 989 if (pcbddc->switch_static || pcbddc->dbg_flag) { 990 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 991 for (i=0, j=0; i<n_R; i++) { 992 if (PetscBTLookup(bitmask,idx_R_local[i])) { 993 aux_array1[j++] = i; 994 } 995 } 996 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 997 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 998 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 999 } 1000 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1001 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1008 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1009 { 1010 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1011 PC_IS *pcis = (PC_IS*)pc->data; 1012 PC pc_temp; 1013 Mat A_RR; 1014 MatStructure matstruct; 1015 MatReuse reuse; 1016 PetscScalar m_one = -1.0; 1017 PetscReal value; 1018 PetscInt n_D,n_R,ibs,mbs; 1019 PetscBool use_exact,use_exact_reduced,issbaij; 1020 PetscErrorCode ierr; 1021 /* prefixes stuff */ 1022 char dir_prefix[256],neu_prefix[256],str_level[16]; 1023 size_t len; 1024 1025 PetscFunctionBegin; 1026 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 1027 1028 /* compute prefixes */ 1029 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1030 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1031 if (!pcbddc->current_level) { 1032 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1033 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1034 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1035 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1036 } else { 1037 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1038 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1039 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1040 len -= 15; /* remove "pc_bddc_coarse_" */ 1041 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1042 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1043 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1044 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1045 *(dir_prefix+len)='\0'; 1046 *(neu_prefix+len)='\0'; 1047 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1048 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1049 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1050 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1051 } 1052 1053 /* DIRICHLET PROBLEM */ 1054 /* Matrix for Dirichlet problem is pcis->A_II */ 1055 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1056 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1057 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1058 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1059 /* default */ 1060 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1061 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1062 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1063 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1064 if (issbaij) { 1065 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1066 } else { 1067 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1068 } 1069 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1070 /* Allow user's customization */ 1071 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1072 } 1073 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 1074 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1075 if (!n_D) { 1076 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1077 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1078 } 1079 /* Set Up KSP for Dirichlet problem of BDDC */ 1080 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1081 /* set ksp_D into pcis data */ 1082 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1083 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1084 pcis->ksp_D = pcbddc->ksp_D; 1085 1086 /* NEUMANN PROBLEM */ 1087 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1088 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1089 if (pcbddc->ksp_R) { /* already created ksp */ 1090 PetscInt nn_R; 1091 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr); 1092 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1093 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1094 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1095 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1096 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1097 reuse = MAT_INITIAL_MATRIX; 1098 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1099 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1100 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1101 reuse = MAT_INITIAL_MATRIX; 1102 } else { /* safe to reuse the matrix */ 1103 reuse = MAT_REUSE_MATRIX; 1104 } 1105 } 1106 /* last check */ 1107 if (matstruct == DIFFERENT_NONZERO_PATTERN) { 1108 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1109 reuse = MAT_INITIAL_MATRIX; 1110 } 1111 } else { /* first time, so we need to create the matrix */ 1112 reuse = MAT_INITIAL_MATRIX; 1113 } 1114 /* extract A_RR */ 1115 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1116 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1117 if (ibs != mbs) { 1118 Mat newmat; 1119 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1120 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1121 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1122 } else { 1123 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1124 } 1125 if (!pcbddc->ksp_R) { /* create object if not present */ 1126 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1127 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1128 /* default */ 1129 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1130 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1131 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1132 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1133 if (issbaij) { 1134 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1135 } else { 1136 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1137 } 1138 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1139 /* Allow user's customization */ 1140 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1141 } 1142 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1143 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1144 if (!n_R) { 1145 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1146 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1147 } 1148 /* Set Up KSP for Neumann problem of BDDC */ 1149 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1150 1151 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1152 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1153 /* Dirichlet */ 1154 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1155 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1156 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1157 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1158 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1159 /* need to be adapted? */ 1160 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1161 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1162 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1163 /* print info */ 1164 if (pcbddc->dbg_flag) { 1165 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1166 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1167 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1168 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1169 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1170 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1171 } 1172 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1173 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1174 } 1175 1176 /* Neumann */ 1177 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1178 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1179 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1180 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1181 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1182 /* need to be adapted? */ 1183 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1184 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1185 /* print info */ 1186 if (pcbddc->dbg_flag) { 1187 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1188 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1189 } 1190 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1191 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1192 } 1193 } 1194 /* free Neumann problem's matrix */ 1195 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1201 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1202 { 1203 PetscErrorCode ierr; 1204 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1205 1206 PetscFunctionBegin; 1207 if (applytranspose) { 1208 if (pcbddc->local_auxmat1) { 1209 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1210 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1211 } 1212 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1213 } else { 1214 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1215 if (pcbddc->local_auxmat1) { 1216 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1217 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1218 } 1219 } 1220 PetscFunctionReturn(0); 1221 } 1222 1223 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1224 #undef __FUNCT__ 1225 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1226 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1227 { 1228 PetscErrorCode ierr; 1229 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1230 PC_IS* pcis = (PC_IS*) (pc->data); 1231 const PetscScalar zero = 0.0; 1232 1233 PetscFunctionBegin; 1234 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1235 if (applytranspose) { 1236 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1237 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1238 } else { 1239 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1240 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1241 } 1242 /* start communications from local primal nodes to rhs of coarse solver */ 1243 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1244 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 1247 /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */ 1248 /* TODO remove null space when doing multilevel */ 1249 if (pcbddc->coarse_ksp) { 1250 if (applytranspose) { 1251 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1252 } else { 1253 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1254 } 1255 } 1256 /* start communications from coarse solver solution to local primal nodes */ 1257 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1258 1259 /* Local solution on R nodes */ 1260 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1261 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1262 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1263 if (pcbddc->switch_static) { 1264 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1265 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1266 } 1267 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1268 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1269 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1270 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1271 if (pcbddc->switch_static) { 1272 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1273 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1274 } 1275 1276 /* complete communications from coarse sol to local primal nodes */ 1277 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1278 1279 /* Sum contributions from two levels */ 1280 if (applytranspose) { 1281 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1282 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1283 } else { 1284 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1285 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1286 } 1287 PetscFunctionReturn(0); 1288 } 1289 1290 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1293 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1294 { 1295 PetscErrorCode ierr; 1296 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1297 PetscScalar *array,*array2; 1298 Vec from,to; 1299 1300 PetscFunctionBegin; 1301 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1302 from = pcbddc->coarse_vec; 1303 to = pcbddc->vec1_P; 1304 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1305 Vec tvec; 1306 PetscInt lsize; 1307 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1308 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1309 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1310 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1311 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1312 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1313 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1314 } 1315 } else { /* from local to global -> put data in coarse right hand side */ 1316 from = pcbddc->vec1_P; 1317 to = pcbddc->coarse_vec; 1318 } 1319 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1325 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1326 { 1327 PetscErrorCode ierr; 1328 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1329 PetscScalar *array,*array2; 1330 Vec from,to; 1331 1332 PetscFunctionBegin; 1333 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1334 from = pcbddc->coarse_vec; 1335 to = pcbddc->vec1_P; 1336 } else { /* from local to global -> put data in coarse right hand side */ 1337 from = pcbddc->vec1_P; 1338 to = pcbddc->coarse_vec; 1339 } 1340 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1341 if (smode == SCATTER_FORWARD) { 1342 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1343 Vec tvec; 1344 PetscInt lsize; 1345 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1346 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1347 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1348 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1349 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1350 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1351 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1352 } 1353 } 1354 PetscFunctionReturn(0); 1355 } 1356 1357 /* uncomment for testing purposes */ 1358 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1361 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1362 { 1363 PetscErrorCode ierr; 1364 PC_IS* pcis = (PC_IS*)(pc->data); 1365 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1366 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1367 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1368 MatType impMatType=MATSEQAIJ; 1369 /* one and zero */ 1370 PetscScalar one=1.0,zero=0.0; 1371 /* space to store constraints and their local indices */ 1372 PetscScalar *temp_quadrature_constraint; 1373 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1374 /* iterators */ 1375 PetscInt i,j,k,total_counts,temp_start_ptr; 1376 /* stuff to store connected components stored in pcbddc->mat_graph */ 1377 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1378 PetscInt n_ISForFaces,n_ISForEdges; 1379 /* near null space stuff */ 1380 MatNullSpace nearnullsp; 1381 const Vec *nearnullvecs; 1382 Vec *localnearnullsp; 1383 PetscBool nnsp_has_cnst; 1384 PetscInt nnsp_size; 1385 PetscScalar *array; 1386 /* BLAS integers */ 1387 PetscBLASInt lwork,lierr; 1388 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1389 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1390 /* LAPACK working arrays for SVD or POD */ 1391 PetscBool skip_lapack; 1392 PetscScalar *work; 1393 PetscReal *singular_vals; 1394 #if defined(PETSC_USE_COMPLEX) 1395 PetscReal *rwork; 1396 #endif 1397 #if defined(PETSC_MISSING_LAPACK_GESVD) 1398 PetscBLASInt Blas_one_2=1; 1399 PetscScalar *temp_basis,*correlation_mat; 1400 #else 1401 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1402 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1403 #endif 1404 /* reuse */ 1405 PetscInt olocal_primal_size; 1406 PetscInt *oprimal_indices_local_idxs; 1407 /* change of basis */ 1408 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1409 PetscBool boolforchange,qr_needed; 1410 PetscBT touched,change_basis,qr_needed_idx; 1411 /* auxiliary stuff */ 1412 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1413 /* some quantities */ 1414 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1415 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1416 1417 1418 PetscFunctionBegin; 1419 /* Destroy Mat objects computed previously */ 1420 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1421 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1422 /* Get index sets for faces, edges and vertices from graph */ 1423 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1424 pcbddc->use_vertices = PETSC_TRUE; 1425 } 1426 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1427 /* HACKS (the two following code branches) */ 1428 if (!ISForVertices && pcbddc->NullSpace) { 1429 pcbddc->use_change_of_basis = PETSC_TRUE; 1430 pcbddc->use_change_on_faces = PETSC_FALSE; 1431 } 1432 if (pcbddc->NullSpace) { 1433 /* use_change_of_basis should be consistent among processors */ 1434 PetscBool tbool = pcbddc->use_change_of_basis; 1435 ierr = MPI_Allreduce(&tbool,&(pcbddc->use_change_of_basis),1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1436 } 1437 /* print some info */ 1438 if (pcbddc->dbg_flag) { 1439 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1440 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1441 i = 0; 1442 if (ISForVertices) { 1443 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1444 } 1445 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1446 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1447 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1448 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1449 } 1450 /* check if near null space is attached to global mat */ 1451 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1452 if (nearnullsp) { 1453 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1454 /* remove any stored info */ 1455 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1456 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1457 /* store information for BDDC solver reuse */ 1458 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1459 pcbddc->onearnullspace = nearnullsp; 1460 ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1461 for (i=0;i<nnsp_size;i++) { 1462 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1463 } 1464 } else { /* if near null space is not provided BDDC uses constants by default */ 1465 nnsp_size = 0; 1466 nnsp_has_cnst = PETSC_TRUE; 1467 } 1468 /* get max number of constraints on a single cc */ 1469 max_constraints = nnsp_size; 1470 if (nnsp_has_cnst) max_constraints++; 1471 1472 /* 1473 Evaluate maximum storage size needed by the procedure 1474 - temp_indices will contain start index of each constraint stored as follows 1475 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1476 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1477 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1478 */ 1479 total_counts = n_ISForFaces+n_ISForEdges; 1480 total_counts *= max_constraints; 1481 n_vertices = 0; 1482 if (ISForVertices) { 1483 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1484 } 1485 total_counts += n_vertices; 1486 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1487 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1488 total_counts = 0; 1489 max_size_of_constraint = 0; 1490 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1491 if (i<n_ISForEdges) { 1492 used_IS = &ISForEdges[i]; 1493 } else { 1494 used_IS = &ISForFaces[i-n_ISForEdges]; 1495 } 1496 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1497 total_counts += j; 1498 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1499 } 1500 total_counts *= max_constraints; 1501 total_counts += n_vertices; 1502 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1503 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1504 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1505 /* get local part of global near null space vectors */ 1506 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1507 for (k=0;k<nnsp_size;k++) { 1508 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1509 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1510 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1511 } 1512 1513 /* whether or not to skip lapack calls */ 1514 skip_lapack = PETSC_TRUE; 1515 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1516 1517 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1518 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1519 PetscScalar temp_work; 1520 #if defined(PETSC_MISSING_LAPACK_GESVD) 1521 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1522 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1523 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1524 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1525 #if defined(PETSC_USE_COMPLEX) 1526 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1527 #endif 1528 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1529 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1530 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1531 lwork = -1; 1532 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1533 #if !defined(PETSC_USE_COMPLEX) 1534 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1535 #else 1536 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1537 #endif 1538 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1539 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1540 #else /* on missing GESVD */ 1541 /* SVD */ 1542 PetscInt max_n,min_n; 1543 max_n = max_size_of_constraint; 1544 min_n = max_constraints; 1545 if (max_size_of_constraint < max_constraints) { 1546 min_n = max_size_of_constraint; 1547 max_n = max_constraints; 1548 } 1549 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1550 #if defined(PETSC_USE_COMPLEX) 1551 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1552 #endif 1553 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1554 lwork = -1; 1555 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1556 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1557 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1558 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1559 #if !defined(PETSC_USE_COMPLEX) 1560 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1561 #else 1562 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1563 #endif 1564 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1565 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1566 #endif /* on missing GESVD */ 1567 /* Allocate optimal workspace */ 1568 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1569 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1570 } 1571 /* Now we can loop on constraining sets */ 1572 total_counts = 0; 1573 temp_indices[0] = 0; 1574 /* vertices */ 1575 if (ISForVertices) { 1576 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1577 if (nnsp_has_cnst) { /* consider all vertices */ 1578 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1579 for (i=0;i<n_vertices;i++) { 1580 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1581 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1582 total_counts++; 1583 } 1584 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1585 PetscBool used_vertex; 1586 for (i=0;i<n_vertices;i++) { 1587 used_vertex = PETSC_FALSE; 1588 k = 0; 1589 while (!used_vertex && k<nnsp_size) { 1590 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1591 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1592 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1593 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1594 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1595 total_counts++; 1596 used_vertex = PETSC_TRUE; 1597 } 1598 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1599 k++; 1600 } 1601 } 1602 } 1603 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1604 n_vertices = total_counts; 1605 } 1606 1607 /* edges and faces */ 1608 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1609 if (i<n_ISForEdges) { 1610 used_IS = &ISForEdges[i]; 1611 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1612 } else { 1613 used_IS = &ISForFaces[i-n_ISForEdges]; 1614 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1615 } 1616 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1617 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1618 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1619 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1620 /* change of basis should not be performed on local periodic nodes */ 1621 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1622 if (nnsp_has_cnst) { 1623 PetscScalar quad_value; 1624 temp_constraints++; 1625 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1626 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1627 for (j=0;j<size_of_constraint;j++) { 1628 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1629 } 1630 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1631 total_counts++; 1632 } 1633 for (k=0;k<nnsp_size;k++) { 1634 PetscReal real_value; 1635 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1636 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1637 for (j=0;j<size_of_constraint;j++) { 1638 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1639 } 1640 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1641 /* check if array is null on the connected component */ 1642 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1643 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1644 if (real_value > 0.0) { /* keep indices and values */ 1645 temp_constraints++; 1646 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1647 total_counts++; 1648 } 1649 } 1650 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1651 valid_constraints = temp_constraints; 1652 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1653 if (!pcbddc->use_nnsp_true && temp_constraints) { 1654 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1655 1656 #if defined(PETSC_MISSING_LAPACK_GESVD) 1657 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1658 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1659 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1660 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1661 from that computed using LAPACKgesvd 1662 -> This is due to a different computation of eigenvectors in LAPACKheev 1663 -> The quality of the POD-computed basis will be the same */ 1664 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1665 /* Store upper triangular part of correlation matrix */ 1666 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1667 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1668 for (j=0;j<temp_constraints;j++) { 1669 for (k=0;k<j+1;k++) { 1670 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1671 } 1672 } 1673 /* compute eigenvalues and eigenvectors of correlation matrix */ 1674 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1675 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1676 #if !defined(PETSC_USE_COMPLEX) 1677 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1678 #else 1679 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1680 #endif 1681 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1682 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1683 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1684 j=0; 1685 while (j < temp_constraints && singular_vals[j] < tol) j++; 1686 total_counts=total_counts-j; 1687 valid_constraints = temp_constraints-j; 1688 /* scale and copy POD basis into used quadrature memory */ 1689 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1690 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1691 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1692 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1693 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1694 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1695 if (j<temp_constraints) { 1696 PetscInt ii; 1697 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1698 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1699 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1700 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1701 for (k=0;k<temp_constraints-j;k++) { 1702 for (ii=0;ii<size_of_constraint;ii++) { 1703 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1704 } 1705 } 1706 } 1707 #else /* on missing GESVD */ 1708 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1709 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1710 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1711 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1712 #if !defined(PETSC_USE_COMPLEX) 1713 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1714 #else 1715 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1716 #endif 1717 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1718 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1719 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1720 k = temp_constraints; 1721 if (k > size_of_constraint) k = size_of_constraint; 1722 j = 0; 1723 while (j < k && singular_vals[k-j-1] < tol) j++; 1724 total_counts = total_counts-temp_constraints+k-j; 1725 valid_constraints = k-j; 1726 #endif /* on missing GESVD */ 1727 } 1728 /* setting change_of_basis flag is safe now */ 1729 if (boolforchange) { 1730 for (j=0;j<valid_constraints;j++) { 1731 PetscBTSet(change_basis,total_counts-j-1); 1732 } 1733 } 1734 } 1735 /* free index sets of faces, edges and vertices */ 1736 for (i=0;i<n_ISForFaces;i++) { 1737 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1738 } 1739 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1740 for (i=0;i<n_ISForEdges;i++) { 1741 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1742 } 1743 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1744 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1745 /* map temp_indices_to_constraint in boundary numbering */ 1746 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1747 if (i != temp_indices[total_counts]) { 1748 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1749 } 1750 1751 /* free workspace */ 1752 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1753 ierr = PetscFree(work);CHKERRQ(ierr); 1754 #if defined(PETSC_USE_COMPLEX) 1755 ierr = PetscFree(rwork);CHKERRQ(ierr); 1756 #endif 1757 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1758 #if defined(PETSC_MISSING_LAPACK_GESVD) 1759 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1760 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1761 #endif 1762 } 1763 for (k=0;k<nnsp_size;k++) { 1764 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1765 } 1766 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1767 1768 /* set quantities in pcbddc data structure and store previous primal size */ 1769 /* n_vertices defines the number of subdomain corners in the primal space */ 1770 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1771 olocal_primal_size = pcbddc->local_primal_size; 1772 pcbddc->local_primal_size = total_counts; 1773 pcbddc->n_vertices = n_vertices; 1774 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1775 1776 /* Create constraint matrix */ 1777 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1778 /* so we need to set it up properly either with or without change of basis */ 1779 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1780 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1781 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1782 /* array to compute a local numbering of constraints : vertices first then constraints */ 1783 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1784 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1785 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1786 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1787 /* auxiliary stuff for basis change */ 1788 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1789 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1790 1791 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1792 total_primal_vertices=0; 1793 for (i=0;i<pcbddc->local_primal_size;i++) { 1794 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1795 if (size_of_constraint == 1) { 1796 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1797 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1798 aux_primal_minloc[total_primal_vertices]=0; 1799 total_primal_vertices++; 1800 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1801 PetscInt min_loc,min_index; 1802 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1803 /* find first untouched local node */ 1804 k = 0; 1805 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1806 min_index = global_indices[k]; 1807 min_loc = k; 1808 /* search the minimum among global nodes already untouched on the cc */ 1809 for (k=1;k<size_of_constraint;k++) { 1810 /* there can be more than one constraint on a single connected component */ 1811 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1812 min_index = global_indices[k]; 1813 min_loc = k; 1814 } 1815 } 1816 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1817 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1818 aux_primal_minloc[total_primal_vertices]=min_loc; 1819 total_primal_vertices++; 1820 } 1821 } 1822 /* determine if a QR strategy is needed for change of basis */ 1823 qr_needed = PETSC_FALSE; 1824 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1825 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1826 if (PetscBTLookup(change_basis,i)) { 1827 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1828 j = 0; 1829 for (k=0;k<size_of_constraint;k++) { 1830 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1831 j++; 1832 } 1833 } 1834 /* found more than one primal dof on the cc */ 1835 if (j > 1) { 1836 PetscBTSet(qr_needed_idx,i); 1837 qr_needed = PETSC_TRUE; 1838 } 1839 } 1840 } 1841 /* free workspace */ 1842 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1843 1844 /* permute indices in order to have a sorted set of vertices */ 1845 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1846 1847 /* nonzero structure of constraint matrix */ 1848 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1849 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1850 j=total_primal_vertices; 1851 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1852 if (!PetscBTLookup(change_basis,i)) { 1853 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1854 j++; 1855 } 1856 } 1857 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1858 ierr = PetscFree(nnz);CHKERRQ(ierr); 1859 /* set values in constraint matrix */ 1860 for (i=0;i<total_primal_vertices;i++) { 1861 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1862 } 1863 total_counts = total_primal_vertices; 1864 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1865 if (!PetscBTLookup(change_basis,i)) { 1866 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1867 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1868 total_counts++; 1869 } 1870 } 1871 /* assembling */ 1872 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1873 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1874 /* 1875 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1876 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1877 */ 1878 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1879 if (pcbddc->use_change_of_basis) { 1880 /* dual and primal dofs on a single cc */ 1881 PetscInt dual_dofs,primal_dofs; 1882 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1883 PetscInt primal_counter; 1884 /* working stuff for GEQRF */ 1885 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1886 PetscBLASInt lqr_work; 1887 /* working stuff for UNGQR */ 1888 PetscScalar *gqr_work,lgqr_work_t; 1889 PetscBLASInt lgqr_work; 1890 /* working stuff for TRTRS */ 1891 PetscScalar *trs_rhs; 1892 PetscBLASInt Blas_NRHS; 1893 /* pointers for values insertion into change of basis matrix */ 1894 PetscInt *start_rows,*start_cols; 1895 PetscScalar *start_vals; 1896 /* working stuff for values insertion */ 1897 PetscBT is_primal; 1898 1899 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1900 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1901 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1902 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1903 /* work arrays */ 1904 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1905 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1906 /* nonzeros per row */ 1907 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1908 if (PetscBTLookup(change_basis,i)) { 1909 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1910 if (PetscBTLookup(qr_needed_idx,i)) { 1911 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1912 } else { 1913 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1914 /* get local primal index on the cc */ 1915 j = 0; 1916 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1917 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1918 } 1919 } 1920 } 1921 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1922 ierr = PetscFree(nnz);CHKERRQ(ierr); 1923 /* Set initial identity in the matrix */ 1924 for (i=0;i<pcis->n_B;i++) { 1925 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1926 } 1927 1928 if (pcbddc->dbg_flag) { 1929 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1930 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1931 } 1932 1933 1934 /* Now we loop on the constraints which need a change of basis */ 1935 /* 1936 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1937 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1938 1939 Basic blocks of change of basis matrix T computed 1940 1941 - Using the following block transformation if there is only a primal dof on the cc 1942 (in the example, primal dof is the last one of the edge in LOCAL ordering 1943 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1944 | 1 0 ... 0 1 | 1945 | 0 1 ... 0 1 | 1946 | ... | 1947 | 0 ... 1 1 | 1948 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1949 1950 - via QR decomposition of constraints otherwise 1951 */ 1952 if (qr_needed) { 1953 /* space to store Q */ 1954 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1955 /* first we issue queries for optimal work */ 1956 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1957 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1958 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1959 lqr_work = -1; 1960 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1961 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1962 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1963 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1964 lgqr_work = -1; 1965 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1966 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1967 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1968 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1969 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1970 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1971 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1972 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1973 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1974 /* array to store scaling factors for reflectors */ 1975 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1976 /* array to store rhs and solution of triangular solver */ 1977 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1978 /* allocating workspace for check */ 1979 if (pcbddc->dbg_flag) { 1980 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1981 } 1982 } 1983 /* array to store whether a node is primal or not */ 1984 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1985 ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr); 1986 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 1987 if (i != total_primal_vertices) { 1988 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 1989 } 1990 for (i=0;i<total_primal_vertices;i++) { 1991 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 1992 } 1993 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 1994 1995 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1996 /* -> using implicit ordering contained in temp_indices data */ 1997 total_counts = pcbddc->n_vertices; 1998 primal_counter = total_counts; 1999 while (total_counts<pcbddc->local_primal_size) { 2000 primal_dofs = 1; 2001 if (PetscBTLookup(change_basis,total_counts)) { 2002 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2003 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2004 primal_dofs++; 2005 } 2006 /* get constraint info */ 2007 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2008 dual_dofs = size_of_constraint-primal_dofs; 2009 2010 if (pcbddc->dbg_flag) { 2011 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2012 } 2013 2014 if (primal_dofs > 1) { /* QR */ 2015 2016 /* copy quadrature constraints for change of basis check */ 2017 if (pcbddc->dbg_flag) { 2018 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2019 } 2020 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2021 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2022 2023 /* compute QR decomposition of constraints */ 2024 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2025 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2026 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2027 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2028 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2029 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2030 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2031 2032 /* explictly compute R^-T */ 2033 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2034 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2035 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2036 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2037 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2038 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2039 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2040 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2041 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2042 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2043 2044 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2045 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2046 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2047 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2048 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2049 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2050 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2051 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2052 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2053 2054 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2055 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2056 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2057 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2058 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2059 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2060 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2061 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2062 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2063 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2064 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2065 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2066 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2067 2068 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2069 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2070 /* insert cols for primal dofs */ 2071 for (j=0;j<primal_dofs;j++) { 2072 start_vals = &qr_basis[j*size_of_constraint]; 2073 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2074 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2075 } 2076 /* insert cols for dual dofs */ 2077 for (j=0,k=0;j<dual_dofs;k++) { 2078 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2079 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2080 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2081 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2082 j++; 2083 } 2084 } 2085 2086 /* check change of basis */ 2087 if (pcbddc->dbg_flag) { 2088 PetscInt ii,jj; 2089 PetscBool valid_qr=PETSC_TRUE; 2090 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2091 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2092 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2093 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2094 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2095 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2096 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2097 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2098 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2099 for (jj=0;jj<size_of_constraint;jj++) { 2100 for (ii=0;ii<primal_dofs;ii++) { 2101 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2102 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2103 } 2104 } 2105 if (!valid_qr) { 2106 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2107 for (jj=0;jj<size_of_constraint;jj++) { 2108 for (ii=0;ii<primal_dofs;ii++) { 2109 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2110 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2111 } 2112 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2113 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2114 } 2115 } 2116 } 2117 } else { 2118 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2119 } 2120 } 2121 } else { /* simple transformation block */ 2122 PetscInt row,col; 2123 PetscScalar val; 2124 for (j=0;j<size_of_constraint;j++) { 2125 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2126 if (!PetscBTLookup(is_primal,row)) { 2127 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2128 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2129 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2130 } else { 2131 for (k=0;k<size_of_constraint;k++) { 2132 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2133 if (row != col) { 2134 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2135 } else { 2136 val = 1.0; 2137 } 2138 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2139 } 2140 } 2141 } 2142 if (pcbddc->dbg_flag) { 2143 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2144 } 2145 } 2146 /* increment primal counter */ 2147 primal_counter += primal_dofs; 2148 } else { 2149 if (pcbddc->dbg_flag) { 2150 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2151 } 2152 } 2153 /* increment constraint counter total_counts */ 2154 total_counts += primal_dofs; 2155 } 2156 2157 /* free workspace */ 2158 if (qr_needed) { 2159 if (pcbddc->dbg_flag) { 2160 ierr = PetscFree(work);CHKERRQ(ierr); 2161 } 2162 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2163 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2164 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2165 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2166 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2167 } 2168 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2169 /* assembling */ 2170 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2171 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2172 /* 2173 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2174 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2175 */ 2176 } 2177 2178 /* get indices in local ordering for vertices and constraints */ 2179 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2180 ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr); 2181 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2182 } 2183 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2184 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2185 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2186 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2187 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2188 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2189 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2190 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2191 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2192 /* set quantities in PCBDDC data struct */ 2193 pcbddc->n_actual_vertices = i; 2194 /* check if a new primal space has been introduced */ 2195 pcbddc->new_primal_space_local = PETSC_TRUE; 2196 if (olocal_primal_size == pcbddc->local_primal_size) { 2197 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2198 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2199 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2200 } 2201 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2202 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2203 2204 /* flush dbg viewer */ 2205 if (pcbddc->dbg_flag) { 2206 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2207 } 2208 2209 /* free workspace */ 2210 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2211 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2212 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2213 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2214 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2215 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2216 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2217 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2218 PetscFunctionReturn(0); 2219 } 2220 2221 #undef __FUNCT__ 2222 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2223 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2224 { 2225 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2226 PC_IS *pcis = (PC_IS*)pc->data; 2227 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2228 PetscInt ierr,i,vertex_size; 2229 PetscViewer viewer=pcbddc->dbg_viewer; 2230 2231 PetscFunctionBegin; 2232 /* Reset previously computed graph */ 2233 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2234 /* Init local Graph struct */ 2235 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2236 2237 /* Check validity of the csr graph passed in by the user */ 2238 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2239 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2240 } 2241 2242 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2243 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2244 Mat mat_adj; 2245 const PetscInt *xadj,*adjncy; 2246 PetscBool flg_row=PETSC_TRUE; 2247 2248 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2249 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2250 if (!flg_row) { 2251 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2252 } 2253 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2254 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2255 if (!flg_row) { 2256 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2257 } 2258 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2259 } 2260 2261 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2262 vertex_size = 1; 2263 if (pcbddc->user_provided_isfordofs) { 2264 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2265 ierr = PetscMalloc(pcbddc->n_ISForDofs*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2266 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2267 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2268 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2269 } 2270 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2271 pcbddc->n_ISForDofs = 0; 2272 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2273 } 2274 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2275 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2276 } else { 2277 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2278 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2279 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2280 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2281 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2282 } 2283 } 2284 } 2285 2286 /* Setup of Graph */ 2287 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2288 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2289 } 2290 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2291 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2292 } 2293 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2294 2295 /* Graph's connected components analysis */ 2296 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2297 2298 /* print some info to stdout */ 2299 if (pcbddc->dbg_flag) { 2300 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2301 } 2302 2303 /* mark topography has done */ 2304 pcbddc->recompute_topography = PETSC_FALSE; 2305 PetscFunctionReturn(0); 2306 } 2307 2308 #undef __FUNCT__ 2309 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2310 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2311 { 2312 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2313 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2314 PetscErrorCode ierr; 2315 2316 PetscFunctionBegin; 2317 n = 0; 2318 vertices = 0; 2319 if (pcbddc->ConstraintMatrix) { 2320 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2321 for (i=0;i<local_primal_size;i++) { 2322 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2323 if (size_of_constraint == 1) n++; 2324 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2325 } 2326 if (vertices_idx) { 2327 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2328 n = 0; 2329 for (i=0;i<local_primal_size;i++) { 2330 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2331 if (size_of_constraint == 1) { 2332 vertices[n++]=row_cmat_indices[0]; 2333 } 2334 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2335 } 2336 } 2337 } 2338 *n_vertices = n; 2339 if (vertices_idx) *vertices_idx = vertices; 2340 PetscFunctionReturn(0); 2341 } 2342 2343 #undef __FUNCT__ 2344 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2345 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2346 { 2347 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2348 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2349 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2350 PetscBT touched; 2351 PetscErrorCode ierr; 2352 2353 /* This function assumes that the number of local constraints per connected component 2354 is not greater than the number of nodes defined for the connected component 2355 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2356 PetscFunctionBegin; 2357 n = 0; 2358 constraints_index = 0; 2359 if (pcbddc->ConstraintMatrix) { 2360 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2361 max_size_of_constraint = 0; 2362 for (i=0;i<local_primal_size;i++) { 2363 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2364 if (size_of_constraint > 1) { 2365 n++; 2366 } 2367 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2368 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2369 } 2370 if (constraints_idx) { 2371 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2372 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2373 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2374 n = 0; 2375 for (i=0;i<local_primal_size;i++) { 2376 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2377 if (size_of_constraint > 1) { 2378 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2379 /* find first untouched local node */ 2380 j = 0; 2381 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2382 min_index = row_cmat_global_indices[j]; 2383 min_loc = j; 2384 /* search the minimum among nodes not yet touched on the connected component 2385 since there can be more than one constraint on a single cc */ 2386 for (j=1;j<size_of_constraint;j++) { 2387 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2388 min_index = row_cmat_global_indices[j]; 2389 min_loc = j; 2390 } 2391 } 2392 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2393 constraints_index[n++] = row_cmat_indices[min_loc]; 2394 } 2395 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2396 } 2397 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2398 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2399 } 2400 } 2401 *n_constraints = n; 2402 if (constraints_idx) *constraints_idx = constraints_index; 2403 PetscFunctionReturn(0); 2404 } 2405 2406 /* the next two functions has been adapted from pcis.c */ 2407 #undef __FUNCT__ 2408 #define __FUNCT__ "PCBDDCApplySchur" 2409 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2410 { 2411 PetscErrorCode ierr; 2412 PC_IS *pcis = (PC_IS*)(pc->data); 2413 2414 PetscFunctionBegin; 2415 if (!vec2_B) { vec2_B = v; } 2416 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2417 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2418 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2419 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2420 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2421 PetscFunctionReturn(0); 2422 } 2423 2424 #undef __FUNCT__ 2425 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2426 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2427 { 2428 PetscErrorCode ierr; 2429 PC_IS *pcis = (PC_IS*)(pc->data); 2430 2431 PetscFunctionBegin; 2432 if (!vec2_B) { vec2_B = v; } 2433 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2434 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2435 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2436 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2437 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2438 PetscFunctionReturn(0); 2439 } 2440 2441 #undef __FUNCT__ 2442 #define __FUNCT__ "PCBDDCSubsetNumbering" 2443 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2444 { 2445 Vec local_vec,global_vec; 2446 IS seqis,paris; 2447 VecScatter scatter_ctx; 2448 PetscScalar *array; 2449 PetscInt *temp_global_dofs; 2450 PetscScalar globalsum; 2451 PetscInt i,j,s; 2452 PetscInt nlocals,first_index,old_index,max_local; 2453 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2454 PetscMPIInt *dof_sizes,*dof_displs; 2455 PetscBool first_found; 2456 PetscErrorCode ierr; 2457 2458 PetscFunctionBegin; 2459 /* mpi buffers */ 2460 MPI_Comm_size(comm,&size_prec_comm); 2461 MPI_Comm_rank(comm,&rank_prec_comm); 2462 j = ( !rank_prec_comm ? size_prec_comm : 0); 2463 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2464 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2465 /* get maximum size of subset */ 2466 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2467 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2468 max_local = 0; 2469 if (n_local_dofs) { 2470 max_local = temp_global_dofs[0]; 2471 for (i=1;i<n_local_dofs;i++) { 2472 if (max_local < temp_global_dofs[i] ) { 2473 max_local = temp_global_dofs[i]; 2474 } 2475 } 2476 } 2477 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2478 max_global++; 2479 max_local = 0; 2480 if (n_local_dofs) { 2481 max_local = local_dofs[0]; 2482 for (i=1;i<n_local_dofs;i++) { 2483 if (max_local < local_dofs[i] ) { 2484 max_local = local_dofs[i]; 2485 } 2486 } 2487 } 2488 max_local++; 2489 /* allocate workspace */ 2490 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2491 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2492 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2493 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2494 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2495 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2496 /* create scatter */ 2497 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2498 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2499 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2500 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2501 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2502 /* init array */ 2503 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2504 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2505 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2506 if (local_dofs_mult) { 2507 for (i=0;i<n_local_dofs;i++) { 2508 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2509 } 2510 } else { 2511 for (i=0;i<n_local_dofs;i++) { 2512 array[local_dofs[i]]=1.0; 2513 } 2514 } 2515 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2516 /* scatter into global vec and get total number of global dofs */ 2517 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2518 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2519 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2520 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2521 /* Fill global_vec with cumulative function for global numbering */ 2522 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2523 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2524 nlocals = 0; 2525 first_index = -1; 2526 first_found = PETSC_FALSE; 2527 for (i=0;i<s;i++) { 2528 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2529 first_found = PETSC_TRUE; 2530 first_index = i; 2531 } 2532 nlocals += (PetscInt)PetscRealPart(array[i]); 2533 } 2534 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2535 if (!rank_prec_comm) { 2536 dof_displs[0]=0; 2537 for (i=1;i<size_prec_comm;i++) { 2538 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2539 } 2540 } 2541 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2542 if (first_found) { 2543 array[first_index] += (PetscScalar)nlocals; 2544 old_index = first_index; 2545 for (i=first_index+1;i<s;i++) { 2546 if (PetscRealPart(array[i]) > 0.0) { 2547 array[i] += array[old_index]; 2548 old_index = i; 2549 } 2550 } 2551 } 2552 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2553 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2554 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2555 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2556 /* get global ordering of local dofs */ 2557 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2558 if (local_dofs_mult) { 2559 for (i=0;i<n_local_dofs;i++) { 2560 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2561 } 2562 } else { 2563 for (i=0;i<n_local_dofs;i++) { 2564 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2565 } 2566 } 2567 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2568 /* free workspace */ 2569 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2570 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2571 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2572 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2573 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2574 /* return pointer to global ordering of local dofs */ 2575 *global_numbering_subset = temp_global_dofs; 2576 PetscFunctionReturn(0); 2577 } 2578 2579 #undef __FUNCT__ 2580 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2581 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2582 { 2583 PetscInt i,j; 2584 PetscScalar *alphas; 2585 PetscErrorCode ierr; 2586 2587 PetscFunctionBegin; 2588 /* this implements stabilized Gram-Schmidt */ 2589 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2590 for (i=0;i<n;i++) { 2591 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2592 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2593 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2594 } 2595 ierr = PetscFree(alphas);CHKERRQ(ierr); 2596 PetscFunctionReturn(0); 2597 } 2598 2599 /* TODO 2600 - now preallocation is done assuming SEQDENSE local matrices 2601 */ 2602 #undef __FUNCT__ 2603 #define __FUNCT__ "MatISGetMPIXAIJ" 2604 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M) 2605 { 2606 Mat new_mat; 2607 Mat_IS *matis = (Mat_IS*)(mat->data); 2608 /* info on mat */ 2609 /* ISLocalToGlobalMapping rmapping,cmapping; */ 2610 PetscInt bs,rows,cols; 2611 PetscInt lrows,lcols; 2612 PetscInt local_rows,local_cols; 2613 PetscBool isdense; 2614 /* values insertion */ 2615 PetscScalar *array; 2616 PetscInt *local_indices,*global_indices; 2617 /* work */ 2618 PetscInt i,j,index_row; 2619 PetscErrorCode ierr; 2620 2621 PetscFunctionBegin; 2622 /* MISSING CHECKS 2623 - rectangular case not covered (it is not allowed by MATIS) 2624 */ 2625 /* get info from mat */ 2626 /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */ 2627 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2628 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2629 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2630 2631 /* work */ 2632 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2633 for (i=0;i<local_rows;i++) local_indices[i]=i; 2634 /* map indices of local mat to global values */ 2635 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2636 /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */ 2637 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2638 2639 if (reuse==MAT_INITIAL_MATRIX) { 2640 Vec vec_dnz,vec_onz; 2641 PetscScalar *my_dnz,*my_onz; 2642 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2643 PetscInt index_col,owner; 2644 PetscMPIInt nsubdomains; 2645 2646 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2647 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2648 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2649 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2650 ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr); 2651 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2652 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2653 2654 /* 2655 preallocation 2656 */ 2657 2658 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2659 /* 2660 Some vectors are needed to sum up properly on shared interface dofs. 2661 Preallocation macros cannot do the job. 2662 Note that preallocation is not exact, since it overestimates nonzeros 2663 */ 2664 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2665 /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */ 2666 ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr); 2667 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2668 /* All processes need to compute entire row ownership */ 2669 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2670 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2671 for (i=0;i<nsubdomains;i++) { 2672 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2673 row_ownership[j]=i; 2674 } 2675 } 2676 2677 /* 2678 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2679 then, they will be summed up properly. This way, preallocation is always sufficient 2680 */ 2681 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2682 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2683 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2684 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2685 for (i=0;i<local_rows;i++) { 2686 index_row = global_indices[i]; 2687 for (j=i;j<local_rows;j++) { 2688 owner = row_ownership[index_row]; 2689 index_col = global_indices[j]; 2690 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2691 my_dnz[i] += 1.0; 2692 } else { /* offdiag block */ 2693 my_onz[i] += 1.0; 2694 } 2695 /* same as before, interchanging rows and cols */ 2696 if (i != j) { 2697 owner = row_ownership[index_col]; 2698 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2699 my_dnz[j] += 1.0; 2700 } else { 2701 my_onz[j] += 1.0; 2702 } 2703 } 2704 } 2705 } 2706 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2707 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2708 if (local_rows) { /* multilevel guard */ 2709 ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2710 ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2711 } 2712 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2713 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2714 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2715 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2716 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2717 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2718 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2719 2720 /* set computed preallocation in dnz and onz */ 2721 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2722 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2723 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2724 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2725 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2726 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2727 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2728 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2729 2730 /* Resize preallocation if overestimated */ 2731 for (i=0;i<lrows;i++) { 2732 dnz[i] = PetscMin(dnz[i],lcols); 2733 onz[i] = PetscMin(onz[i],cols-lcols); 2734 } 2735 /* set preallocation */ 2736 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2737 for (i=0;i<lrows/bs;i++) { 2738 dnz[i] = dnz[i*bs]/bs; 2739 onz[i] = onz[i*bs]/bs; 2740 } 2741 ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2742 for (i=0;i<lrows/bs;i++) { 2743 dnz[i] = dnz[i]-i; 2744 } 2745 ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2746 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2747 *M = new_mat; 2748 } else { 2749 PetscInt mbs,mrows,mcols; 2750 /* some checks */ 2751 ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); 2752 ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); 2753 if (mrows != rows) { 2754 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); 2755 } 2756 if (mrows != rows) { 2757 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); 2758 } 2759 if (mbs != bs) { 2760 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); 2761 } 2762 ierr = MatZeroEntries(*M);CHKERRQ(ierr); 2763 } 2764 /* set local to global mappings */ 2765 /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */ 2766 /* Set values */ 2767 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2768 if (isdense) { /* special case for dense local matrices */ 2769 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2770 ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr); 2771 ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2772 ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr); 2773 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2774 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2775 } else { /* very basic values insertion for all other matrix types */ 2776 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2777 for (i=0;i<local_rows;i++) { 2778 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2779 /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */ 2780 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2781 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2782 ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2783 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2784 } 2785 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2786 } 2787 ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2788 ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2789 if (isdense) { 2790 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2791 } 2792 PetscFunctionReturn(0); 2793 } 2794 2795 #undef __FUNCT__ 2796 #define __FUNCT__ "MatISGetSubassemblingPattern" 2797 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2798 { 2799 Mat subdomain_adj; 2800 IS new_ranks,ranks_send_to; 2801 MatPartitioning partitioner; 2802 Mat_IS *matis; 2803 PetscInt n_neighs,*neighs,*n_shared,**shared; 2804 PetscInt prank; 2805 PetscMPIInt size,rank,color; 2806 PetscInt *xadj,*adjncy,*oldranks; 2807 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2808 PetscInt i,j,local_size,threshold=0; 2809 PetscErrorCode ierr; 2810 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2811 PetscSubcomm subcomm; 2812 2813 PetscFunctionBegin; 2814 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2815 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2816 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2817 2818 /* Get info on mapping */ 2819 matis = (Mat_IS*)(mat->data); 2820 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2821 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2822 2823 /* build local CSR graph of subdomains' connectivity */ 2824 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2825 xadj[0] = 0; 2826 xadj[1] = PetscMax(n_neighs-1,0); 2827 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2828 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2829 2830 if (threshold) { 2831 PetscInt* count,min_threshold; 2832 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2833 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2834 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2835 for (j=0;j<n_shared[i];j++) { 2836 count[shared[i][j]] += 1; 2837 } 2838 } 2839 /* adapt threshold since we dont want to lose significant connections */ 2840 min_threshold = n_neighs; 2841 for (i=1;i<n_neighs;i++) { 2842 for (j=0;j<n_shared[i];j++) { 2843 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2844 } 2845 } 2846 threshold = PetscMax(min_threshold+1,threshold); 2847 xadj[1] = 0; 2848 for (i=1;i<n_neighs;i++) { 2849 for (j=0;j<n_shared[i];j++) { 2850 if (count[shared[i][j]] < threshold) { 2851 adjncy[xadj[1]] = neighs[i]; 2852 adjncy_wgt[xadj[1]] = n_shared[i]; 2853 xadj[1]++; 2854 break; 2855 } 2856 } 2857 } 2858 ierr = PetscFree(count);CHKERRQ(ierr); 2859 } else { 2860 if (xadj[1]) { 2861 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2862 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2863 } 2864 } 2865 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2866 if (use_square) { 2867 for (i=0;i<xadj[1];i++) { 2868 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2869 } 2870 } 2871 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2872 2873 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2874 2875 /* 2876 Restrict work on active processes only. 2877 */ 2878 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2879 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2880 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2881 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2882 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2883 if (color) { 2884 ierr = PetscFree(xadj);CHKERRQ(ierr); 2885 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2886 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2887 } else { 2888 PetscInt coarsening_ratio; 2889 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2890 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2891 prank = rank; 2892 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2893 /* 2894 for (i=0;i<size;i++) { 2895 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2896 } 2897 */ 2898 for (i=0;i<xadj[1];i++) { 2899 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2900 } 2901 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2902 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2903 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2904 2905 /* Partition */ 2906 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2907 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2908 if (use_vwgt) { 2909 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2910 v_wgt[0] = local_size; 2911 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2912 } 2913 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2914 coarsening_ratio = size/n_subdomains; 2915 /* Parmetis does not always give back nparts with small graphs! this should be taken into account */ 2916 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2917 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2918 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2919 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2920 2921 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2922 if (contiguous) { 2923 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2924 } else { 2925 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2926 } 2927 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2928 /* clean up */ 2929 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2930 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2931 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2932 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2933 } 2934 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2935 2936 /* assemble parallel IS for sends */ 2937 i = 1; 2938 if (color) i=0; 2939 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2940 2941 /* get back IS */ 2942 *is_sends = ranks_send_to; 2943 PetscFunctionReturn(0); 2944 } 2945 2946 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2947 2948 #undef __FUNCT__ 2949 #define __FUNCT__ "MatISSubassemble" 2950 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 2951 { 2952 Mat local_mat; 2953 Mat_IS *matis; 2954 IS is_sends_internal; 2955 PetscInt rows,cols; 2956 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 2957 PetscBool ismatis,isdense,destroy_mat; 2958 ISLocalToGlobalMapping l2gmap; 2959 PetscInt* l2gmap_indices; 2960 const PetscInt* is_indices; 2961 MatType new_local_type; 2962 /* buffers */ 2963 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2964 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 2965 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2966 /* MPI */ 2967 MPI_Comm comm,comm_n; 2968 PetscSubcomm subcomm; 2969 PetscMPIInt n_sends,n_recvs,commsize; 2970 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 2971 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 2972 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 2973 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 2974 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 2975 PetscErrorCode ierr; 2976 2977 PetscFunctionBegin; 2978 /* TODO: add missing checks */ 2979 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 2980 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 2981 PetscValidLogicalCollectiveEnum(mat,reuse,5); 2982 PetscValidLogicalCollectiveInt(mat,nis,7); 2983 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2984 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 2985 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2986 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2987 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2988 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2989 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2990 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 2991 PetscInt mrows,mcols,mnrows,mncols; 2992 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2993 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2994 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2995 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2996 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2997 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2998 } 2999 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3000 PetscValidLogicalCollectiveInt(mat,bs,0); 3001 /* prepare IS for sending if not provided */ 3002 if (!is_sends) { 3003 PetscBool pcontig = PETSC_TRUE; 3004 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3005 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3006 } else { 3007 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3008 is_sends_internal = is_sends; 3009 } 3010 3011 /* get pointer of MATIS data */ 3012 matis = (Mat_IS*)mat->data; 3013 3014 /* get comm */ 3015 comm = PetscObjectComm((PetscObject)mat); 3016 3017 /* compute number of sends */ 3018 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3019 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3020 3021 /* compute number of receives */ 3022 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3023 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 3024 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3025 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3026 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3027 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3028 ierr = PetscFree(iflags);CHKERRQ(ierr); 3029 3030 /* restrict comm if requested */ 3031 subcomm = 0; 3032 destroy_mat = PETSC_FALSE; 3033 if (restrict_comm) { 3034 PetscMPIInt color,rank,subcommsize; 3035 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3036 color = 0; 3037 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3038 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3039 subcommsize = commsize - subcommsize; 3040 /* check if reuse has been requested */ 3041 if (reuse == MAT_REUSE_MATRIX) { 3042 if (*mat_n) { 3043 PetscMPIInt subcommsize2; 3044 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3045 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3046 comm_n = PetscObjectComm((PetscObject)*mat_n); 3047 } else { 3048 comm_n = PETSC_COMM_SELF; 3049 } 3050 } else { /* MAT_INITIAL_MATRIX */ 3051 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3052 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3053 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3054 comm_n = subcomm->comm; 3055 } 3056 /* flag to destroy *mat_n if not significative */ 3057 if (color) destroy_mat = PETSC_TRUE; 3058 } else { 3059 comm_n = comm; 3060 } 3061 3062 /* prepare send/receive buffers */ 3063 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 3064 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3065 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 3066 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3067 if (nis) { 3068 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr); 3069 ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr); 3070 } 3071 3072 /* Get data from local matrices */ 3073 if (!isdense) { 3074 /* TODO: See below some guidelines on how to prepare the local buffers */ 3075 /* 3076 send_buffer_vals should contain the raw values of the local matrix 3077 send_buffer_idxs should contain: 3078 - MatType_PRIVATE type 3079 - PetscInt size_of_l2gmap 3080 - PetscInt global_row_indices[size_of_l2gmap] 3081 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3082 */ 3083 } else { 3084 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3085 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3086 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 3087 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3088 send_buffer_idxs[1] = i; 3089 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3090 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3091 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3092 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3093 for (i=0;i<n_sends;i++) { 3094 ilengths_vals[is_indices[i]] = len*len; 3095 ilengths_idxs[is_indices[i]] = len+2; 3096 } 3097 } 3098 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3099 /* additional is (if any) */ 3100 if (nis) { 3101 PetscMPIInt psum; 3102 PetscInt j; 3103 for (j=0,psum=0;j<nis;j++) { 3104 PetscInt plen; 3105 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3106 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3107 psum += len+1; /* indices + lenght */ 3108 } 3109 ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr); 3110 for (j=0,psum=0;j<nis;j++) { 3111 PetscInt plen; 3112 const PetscInt *is_array_idxs; 3113 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3114 send_buffer_idxs_is[psum] = plen; 3115 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3116 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3117 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3118 psum += plen+1; /* indices + lenght */ 3119 } 3120 for (i=0;i<n_sends;i++) { 3121 ilengths_idxs_is[is_indices[i]] = psum; 3122 } 3123 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3124 } 3125 3126 buf_size_idxs = 0; 3127 buf_size_vals = 0; 3128 buf_size_idxs_is = 0; 3129 for (i=0;i<n_recvs;i++) { 3130 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3131 buf_size_vals += (PetscInt)olengths_vals[i]; 3132 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3133 } 3134 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 3135 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 3136 ierr = PetscMalloc(buf_size_idxs_is*sizeof(PetscInt),&recv_buffer_idxs_is);CHKERRQ(ierr); 3137 3138 /* get new tags for clean communications */ 3139 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3140 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3141 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3142 3143 /* allocate for requests */ 3144 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 3145 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 3146 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs_is);CHKERRQ(ierr); 3147 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 3148 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 3149 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs_is);CHKERRQ(ierr); 3150 3151 /* communications */ 3152 ptr_idxs = recv_buffer_idxs; 3153 ptr_vals = recv_buffer_vals; 3154 ptr_idxs_is = recv_buffer_idxs_is; 3155 for (i=0;i<n_recvs;i++) { 3156 source_dest = onodes[i]; 3157 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3158 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3159 ptr_idxs += olengths_idxs[i]; 3160 ptr_vals += olengths_vals[i]; 3161 if (nis) { 3162 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3163 ptr_idxs_is += olengths_idxs_is[i]; 3164 } 3165 } 3166 for (i=0;i<n_sends;i++) { 3167 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3168 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3169 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3170 if (nis) { 3171 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3172 } 3173 } 3174 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3175 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3176 3177 /* assemble new l2g map */ 3178 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3179 ptr_idxs = recv_buffer_idxs; 3180 buf_size_idxs = 0; 3181 for (i=0;i<n_recvs;i++) { 3182 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3183 ptr_idxs += olengths_idxs[i]; 3184 } 3185 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 3186 ptr_idxs = recv_buffer_idxs; 3187 buf_size_idxs = 0; 3188 for (i=0;i<n_recvs;i++) { 3189 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3190 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3191 ptr_idxs += olengths_idxs[i]; 3192 } 3193 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3194 ierr = ISLocalToGlobalMappingCreate(comm_n,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3195 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3196 3197 /* infer new local matrix type from received local matrices type */ 3198 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3199 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3200 if (n_recvs) { 3201 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3202 ptr_idxs = recv_buffer_idxs; 3203 for (i=0;i<n_recvs;i++) { 3204 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3205 new_local_type_private = MATAIJ_PRIVATE; 3206 break; 3207 } 3208 ptr_idxs += olengths_idxs[i]; 3209 } 3210 switch (new_local_type_private) { 3211 case MATDENSE_PRIVATE: 3212 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3213 new_local_type = MATSEQAIJ; 3214 bs = 1; 3215 } else { /* if I receive only 1 dense matrix */ 3216 new_local_type = MATSEQDENSE; 3217 bs = 1; 3218 } 3219 break; 3220 case MATAIJ_PRIVATE: 3221 new_local_type = MATSEQAIJ; 3222 bs = 1; 3223 break; 3224 case MATBAIJ_PRIVATE: 3225 new_local_type = MATSEQBAIJ; 3226 break; 3227 case MATSBAIJ_PRIVATE: 3228 new_local_type = MATSEQSBAIJ; 3229 break; 3230 default: 3231 SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3232 break; 3233 } 3234 } else { /* by default, new_local_type is seqdense */ 3235 new_local_type = MATSEQDENSE; 3236 bs = 1; 3237 } 3238 3239 /* create MATIS object if needed */ 3240 if (reuse == MAT_INITIAL_MATRIX) { 3241 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3242 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3243 } else { 3244 /* it also destroys the local matrices */ 3245 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3246 } 3247 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3248 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3249 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3250 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3251 3252 /* set values */ 3253 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3254 ptr_vals = recv_buffer_vals; 3255 ptr_idxs = recv_buffer_idxs; 3256 for (i=0;i<n_recvs;i++) { 3257 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3258 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3259 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3260 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3261 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3262 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3263 } else { 3264 /* TODO */ 3265 } 3266 ptr_idxs += olengths_idxs[i]; 3267 ptr_vals += olengths_vals[i]; 3268 } 3269 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3270 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3271 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3272 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3273 3274 #if 0 3275 if (!restrict_comm) { /* check */ 3276 Vec lvec,rvec; 3277 PetscReal infty_error; 3278 3279 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3280 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3281 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3282 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3283 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3284 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3285 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3286 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3287 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3288 } 3289 #endif 3290 3291 /* assemble new additional is (if any) */ 3292 if (nis) { 3293 PetscInt **temp_idxs,*count_is,j,psum; 3294 3295 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3296 ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr); 3297 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3298 ptr_idxs = recv_buffer_idxs_is; 3299 psum = 0; 3300 for (i=0;i<n_recvs;i++) { 3301 for (j=0;j<nis;j++) { 3302 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3303 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3304 psum += plen; 3305 ptr_idxs += plen+1; /* shift pointer to received data */ 3306 } 3307 } 3308 ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr); 3309 ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr); 3310 for (i=1;i<nis;i++) { 3311 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3312 } 3313 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3314 ptr_idxs = recv_buffer_idxs_is; 3315 for (i=0;i<n_recvs;i++) { 3316 for (j=0;j<nis;j++) { 3317 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3318 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3319 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3320 ptr_idxs += plen+1; /* shift pointer to received data */ 3321 } 3322 } 3323 for (i=0;i<nis;i++) { 3324 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3325 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3326 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3327 } 3328 ierr = PetscFree(count_is);CHKERRQ(ierr); 3329 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3330 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3331 } 3332 /* free workspace */ 3333 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3334 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3335 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3336 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3337 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3338 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3339 if (isdense) { 3340 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3341 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3342 } else { 3343 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3344 } 3345 if (nis) { 3346 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3347 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3348 } 3349 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3350 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3351 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3352 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3353 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3354 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3355 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3356 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3357 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3358 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3359 ierr = PetscFree(onodes);CHKERRQ(ierr); 3360 if (nis) { 3361 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3362 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3363 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3364 } 3365 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3366 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3367 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3368 for (i=0;i<nis;i++) { 3369 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3370 } 3371 } 3372 PetscFunctionReturn(0); 3373 } 3374 3375 /* temporary hack into ksp private data structure */ 3376 #include <petsc-private/kspimpl.h> 3377 3378 #undef __FUNCT__ 3379 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3380 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3381 { 3382 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3383 PC_IS *pcis = (PC_IS*)pc->data; 3384 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3385 MatNullSpace CoarseNullSpace=NULL; 3386 ISLocalToGlobalMapping coarse_islg; 3387 IS coarse_is,*isarray; 3388 PetscInt i,im_active=-1,active_procs=-1; 3389 PetscInt nis,nisdofs,nisneu; 3390 PC pc_temp; 3391 PCType coarse_pc_type; 3392 KSPType coarse_ksp_type; 3393 PetscBool multilevel_requested,multilevel_allowed; 3394 PetscBool setsym,issym,isherm,isbddc,isnn,coarse_reuse; 3395 MatStructure matstruct; 3396 Mat t_coarse_mat_is; 3397 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3398 PetscMPIInt all_procs; 3399 PetscBool csin_ml,csin_ds,csin,csin_type_simple; 3400 PetscErrorCode ierr; 3401 3402 PetscFunctionBegin; 3403 /* Assign global numbering to coarse dofs */ 3404 if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */ 3405 PetscInt ocoarse_size; 3406 ocoarse_size = pcbddc->coarse_size; 3407 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3408 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3409 /* see if we can avoid some work */ 3410 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3411 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3412 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3413 coarse_reuse = PETSC_FALSE; 3414 } else { /* we can safely reuse already computed coarse matrix */ 3415 coarse_reuse = PETSC_TRUE; 3416 } 3417 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3418 coarse_reuse = PETSC_FALSE; 3419 } 3420 /* reset any subassembling information */ 3421 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3422 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3423 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3424 coarse_reuse = PETSC_TRUE; 3425 } 3426 3427 /* count "active" (i.e. with positive local size) and "void" processes */ 3428 im_active = !!(pcis->n); 3429 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3430 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3431 void_procs = all_procs-active_procs; 3432 csin_type_simple = PETSC_TRUE; 3433 if (pcbddc->current_level) { 3434 csin_ml = PETSC_TRUE; 3435 ncoarse_ml = void_procs; 3436 csin_ds = PETSC_TRUE; 3437 ncoarse_ds = void_procs; 3438 if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 3439 } else { 3440 csin_ml = PETSC_FALSE; 3441 ncoarse_ml = all_procs; 3442 if (void_procs) { 3443 csin_ds = PETSC_TRUE; 3444 ncoarse_ds = void_procs; 3445 csin_type_simple = PETSC_FALSE; 3446 } else { 3447 csin_ds = PETSC_FALSE; 3448 ncoarse_ds = all_procs; 3449 } 3450 } 3451 3452 /* 3453 test if we can go multilevel: three conditions must be satisfied: 3454 - we have not exceeded the number of levels requested 3455 - we can actually subassemble the active processes 3456 - we can find a suitable number of MPI processes where we can place the subassembled problem 3457 */ 3458 multilevel_allowed = PETSC_FALSE; 3459 multilevel_requested = PETSC_FALSE; 3460 if (pcbddc->current_level < pcbddc->max_levels) { 3461 multilevel_requested = PETSC_TRUE; 3462 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3463 multilevel_allowed = PETSC_FALSE; 3464 } else { 3465 multilevel_allowed = PETSC_TRUE; 3466 } 3467 } 3468 /* determine number of process partecipating to coarse solver */ 3469 if (multilevel_allowed) { 3470 ncoarse = ncoarse_ml; 3471 csin = csin_ml; 3472 } else { 3473 ncoarse = ncoarse_ds; 3474 csin = csin_ds; 3475 } 3476 3477 /* creates temporary l2gmap and IS for coarse indexes */ 3478 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3479 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3480 3481 /* creates temporary MATIS object for coarse matrix */ 3482 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3483 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3484 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3485 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3486 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3487 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3488 3489 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3490 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3491 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3492 const PetscInt *idxs; 3493 ISLocalToGlobalMapping tmap; 3494 3495 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3496 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3497 /* allocate space for temporary storage */ 3498 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3499 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3500 /* allocate for IS array */ 3501 nisdofs = pcbddc->n_ISForDofsLocal; 3502 nisneu = !!pcbddc->NeumannBoundariesLocal; 3503 nis = nisdofs + nisneu; 3504 ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr); 3505 /* dofs splitting */ 3506 for (i=0;i<nisdofs;i++) { 3507 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3508 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3509 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3510 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3511 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3512 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3513 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3514 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3515 } 3516 /* neumann boundaries */ 3517 if (pcbddc->NeumannBoundariesLocal) { 3518 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3519 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3520 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3521 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3522 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3523 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3524 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3525 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3526 } 3527 /* free memory */ 3528 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3529 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3530 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3531 } else { 3532 nis = 0; 3533 nisdofs = 0; 3534 nisneu = 0; 3535 isarray = NULL; 3536 } 3537 /* destroy no longer needed map */ 3538 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3539 3540 /* restrict on coarse candidates (if needed) */ 3541 coarse_mat_is = NULL; 3542 if (csin) { 3543 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3544 PetscInt j,tissize,*nisindices; 3545 PetscInt *coarse_candidates; 3546 const PetscInt* tisindices; 3547 /* get coarse candidates' ranks in pc communicator */ 3548 ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr); 3549 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3550 for (i=0,j=0;i<all_procs;i++) { 3551 if (!coarse_candidates[i]) { 3552 coarse_candidates[j]=i; 3553 j++; 3554 } 3555 } 3556 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3557 /* get a suitable subassembling pattern */ 3558 if (csin_type_simple) { 3559 PetscMPIInt rank; 3560 PetscInt issize,isidx; 3561 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3562 if (im_active) { 3563 issize = 1; 3564 isidx = (PetscInt)rank; 3565 } else { 3566 issize = 0; 3567 isidx = -1; 3568 } 3569 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3570 } else { 3571 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3572 } 3573 if (pcbddc->dbg_flag) { 3574 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3575 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3576 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3577 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3578 for (i=0;i<j;i++) { 3579 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3580 } 3581 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3582 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3583 } 3584 /* shift the pattern on coarse candidates */ 3585 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3586 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3587 ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr); 3588 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3589 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3590 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3591 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3592 } 3593 if (pcbddc->dbg_flag) { 3594 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3595 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3596 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3597 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3598 } 3599 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3600 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3601 } else { 3602 if (pcbddc->dbg_flag) { 3603 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3604 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3605 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3606 } 3607 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3608 coarse_mat_is = t_coarse_mat_is; 3609 } 3610 3611 /* create local to global scatters for coarse problem */ 3612 if (pcbddc->new_primal_space) { 3613 PetscInt lrows; 3614 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3615 if (coarse_mat_is) { 3616 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3617 } else { 3618 lrows = 0; 3619 } 3620 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3621 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3622 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3623 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3624 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3625 } 3626 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3627 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3628 3629 /* set defaults for coarse KSP and PC */ 3630 if (multilevel_allowed) { 3631 coarse_ksp_type = KSPRICHARDSON; 3632 coarse_pc_type = PCBDDC; 3633 } else { 3634 coarse_ksp_type = KSPPREONLY; 3635 coarse_pc_type = PCREDUNDANT; 3636 } 3637 3638 /* print some info if requested */ 3639 if (pcbddc->dbg_flag) { 3640 if (!multilevel_allowed) { 3641 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3642 if (multilevel_requested) { 3643 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3644 } else if (pcbddc->max_levels) { 3645 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3646 } 3647 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3648 } 3649 } 3650 3651 /* create the coarse KSP object only once with defaults */ 3652 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3653 issym = PETSC_FALSE; 3654 isherm = PETSC_FALSE; 3655 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3656 ierr = MatIsHermitianKnown(pc->pmat,&setsym,&isherm);CHKERRQ(ierr); 3657 if (coarse_mat_is) { 3658 MatReuse coarse_mat_reuse; 3659 PetscViewer dbg_viewer; 3660 if (pcbddc->dbg_flag) { 3661 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3662 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3663 } 3664 if (!pcbddc->coarse_ksp) { 3665 char prefix[256],str_level[16]; 3666 size_t len; 3667 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3668 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3669 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3670 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is,matstruct);CHKERRQ(ierr); 3671 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3672 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3673 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3674 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3675 /* prefix */ 3676 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3677 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3678 if (!pcbddc->current_level) { 3679 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3680 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3681 } else { 3682 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3683 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3684 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3685 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3686 *(prefix+len)='\0'; 3687 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3688 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3689 } 3690 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3691 /* allow user customization */ 3692 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3693 } 3694 3695 /* get some info after set from options */ 3696 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3697 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3698 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3699 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3700 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3701 isbddc = PETSC_FALSE; 3702 } 3703 3704 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3705 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3706 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3707 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3708 if (nisdofs) { 3709 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3710 for (i=0;i<nisdofs;i++) { 3711 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3712 } 3713 } 3714 if (nisneu) { 3715 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3716 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3717 } 3718 3719 /* assemble coarse matrix */ 3720 if (coarse_reuse) { 3721 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3722 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3723 coarse_mat_reuse = MAT_REUSE_MATRIX; 3724 } else { 3725 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3726 } 3727 if (isbddc || isnn) { 3728 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3729 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3730 if (pcbddc->dbg_flag) { 3731 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3732 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3733 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3734 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3735 } 3736 } 3737 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3738 } else { 3739 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3740 } 3741 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3742 3743 /* propagate symmetry info to coarse matrix */ 3744 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3745 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3746 3747 /* set operators */ 3748 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3749 if (pcbddc->dbg_flag) { 3750 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3751 } 3752 } else { /* processes non partecipating to coarse solver (if any) */ 3753 coarse_mat = 0; 3754 } 3755 ierr = PetscFree(isarray);CHKERRQ(ierr); 3756 3757 /* Compute coarse null space (special handling by BDDC only) */ 3758 if (pcbddc->NullSpace) { 3759 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3760 } 3761 3762 if (pcbddc->coarse_ksp) { 3763 Vec crhs,csol; 3764 PetscBool ispreonly; 3765 if (CoarseNullSpace) { 3766 if (isbddc) { 3767 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3768 } else { 3769 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3770 } 3771 } 3772 /* setup coarse ksp */ 3773 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3774 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3775 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3776 /* hack */ 3777 if (!csol) { 3778 ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3779 } 3780 if (!crhs) { 3781 ierr = MatGetVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3782 } 3783 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3784 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3785 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3786 KSP check_ksp; 3787 KSPType check_ksp_type; 3788 PC check_pc; 3789 Vec check_vec,coarse_vec; 3790 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3791 PetscInt its; 3792 PetscBool compute_eigs; 3793 PetscReal *eigs_r,*eigs_c; 3794 PetscInt neigs; 3795 3796 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3797 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3798 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3799 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3800 if (ispreonly) { 3801 check_ksp_type = KSPPREONLY; 3802 compute_eigs = PETSC_FALSE; 3803 } else { 3804 check_ksp_type = KSPGMRES; 3805 compute_eigs = PETSC_TRUE; 3806 } 3807 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3808 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3809 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3810 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3811 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3812 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3813 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3814 /* create random vec */ 3815 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 3816 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 3817 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3818 if (CoarseNullSpace) { 3819 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3820 } 3821 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3822 /* solve coarse problem */ 3823 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 3824 if (CoarseNullSpace) { 3825 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 3826 } 3827 /* set eigenvalue estimation if preonly has not been requested */ 3828 if (compute_eigs) { 3829 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr); 3830 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr); 3831 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 3832 lambda_max = eigs_r[neigs-1]; 3833 lambda_min = eigs_r[0]; 3834 if (pcbddc->use_coarse_estimates) { 3835 if (lambda_max>lambda_min) { 3836 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3837 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3838 } 3839 } 3840 } 3841 3842 /* check coarse problem residual error */ 3843 if (pcbddc->dbg_flag) { 3844 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 3845 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3846 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 3847 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3848 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3849 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3850 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3851 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 3852 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 3853 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 3854 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3855 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3856 if (compute_eigs) { 3857 PetscReal lambda_max_s,lambda_min_s; 3858 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3859 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 3860 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 3861 for (i=0;i<neigs;i++) { 3862 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 3863 } 3864 } 3865 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3866 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3867 } 3868 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3869 if (compute_eigs) { 3870 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 3871 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 3872 } 3873 } 3874 } 3875 /* print additional info */ 3876 if (pcbddc->dbg_flag) { 3877 /* waits until all processes reaches this point */ 3878 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 3879 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3880 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3881 } 3882 3883 /* free memory */ 3884 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3885 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3886 PetscFunctionReturn(0); 3887 } 3888 3889 #undef __FUNCT__ 3890 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3891 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3892 { 3893 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3894 PC_IS* pcis = (PC_IS*)pc->data; 3895 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3896 PetscInt i,coarse_size; 3897 PetscInt *local_primal_indices; 3898 PetscErrorCode ierr; 3899 3900 PetscFunctionBegin; 3901 /* Compute global number of coarse dofs */ 3902 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3903 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3904 } 3905 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3906 3907 /* check numbering */ 3908 if (pcbddc->dbg_flag) { 3909 PetscScalar coarsesum,*array; 3910 3911 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3912 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3913 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3914 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3915 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3916 for (i=0;i<pcbddc->local_primal_size;i++) { 3917 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3918 } 3919 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3920 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3921 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3922 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3923 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3924 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3925 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3926 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3927 for (i=0;i<pcis->n;i++) { 3928 if (array[i] == 1.0) { 3929 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3930 } 3931 } 3932 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3933 for (i=0;i<pcis->n;i++) { 3934 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3935 } 3936 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3937 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3938 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3939 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3940 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3941 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3942 if (pcbddc->dbg_flag > 1) { 3943 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3944 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3945 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3946 for (i=0;i<pcbddc->local_primal_size;i++) { 3947 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3948 } 3949 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3950 } 3951 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3952 } 3953 /* get back data */ 3954 *coarse_size_n = coarse_size; 3955 *local_primal_indices_n = local_primal_indices; 3956 PetscFunctionReturn(0); 3957 } 3958 3959 #undef __FUNCT__ 3960 #define __FUNCT__ "PCBDDCGlobalToLocal" 3961 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis) 3962 { 3963 PC_IS* pcis = (PC_IS*)pc->data; 3964 IS localis_t; 3965 PetscInt i,lsize,*idxs; 3966 PetscScalar *vals; 3967 PetscErrorCode ierr; 3968 3969 PetscFunctionBegin; 3970 /* get dirichlet indices in local ordering exploiting local to global map */ 3971 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 3972 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3973 for (i=0;i<lsize;i++) vals[i] = 1.0; 3974 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3975 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3976 if (idxs) { /* multilevel guard */ 3977 ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3978 } 3979 ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr); 3980 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3981 ierr = PetscFree(vals);CHKERRQ(ierr); 3982 ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr); 3983 /* now compute dirichlet set in local ordering */ 3984 ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3985 ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3986 ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3987 for (i=0,lsize=0;i<pcis->n;i++) { 3988 if (vals[i] == 1.0) { 3989 lsize++; 3990 } 3991 } 3992 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 3993 for (i=0,lsize=0;i<pcis->n;i++) { 3994 if (vals[i] == 1.0) { 3995 idxs[lsize++] = i; 3996 } 3997 } 3998 ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3999 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4000 *localis = localis_t; 4001 PetscFunctionReturn(0); 4002 } 4003